Index: head/secure/lib/libcrypto/amd64/aes-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aes-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aes-x86_64.S (revision 299481) @@ -1,2535 +1,2536 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aes-x86_64.pl. .text .type _x86_64_AES_encrypt,@function .align 16 _x86_64_AES_encrypt: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx movl 240(%r15),%r13d subl $1,%r13d jmp .Lenc_loop .align 16 .Lenc_loop: movzbl %al,%esi movzbl %bl,%edi movzbl %cl,%ebp movl 0(%r14,%rsi,8),%r10d movl 0(%r14,%rdi,8),%r11d movl 0(%r14,%rbp,8),%r12d movzbl %bh,%esi movzbl %ch,%edi movzbl %dl,%ebp xorl 3(%r14,%rsi,8),%r10d xorl 3(%r14,%rdi,8),%r11d movl 0(%r14,%rbp,8),%r8d movzbl %dh,%esi shrl $16,%ecx movzbl %ah,%ebp xorl 3(%r14,%rsi,8),%r12d shrl $16,%edx xorl 3(%r14,%rbp,8),%r8d shrl $16,%ebx leaq 16(%r15),%r15 shrl $16,%eax movzbl %cl,%esi movzbl %dl,%edi movzbl %al,%ebp xorl 2(%r14,%rsi,8),%r10d xorl 2(%r14,%rdi,8),%r11d xorl 2(%r14,%rbp,8),%r12d movzbl %dh,%esi movzbl %ah,%edi movzbl %bl,%ebp xorl 1(%r14,%rsi,8),%r10d xorl 1(%r14,%rdi,8),%r11d xorl 2(%r14,%rbp,8),%r8d movl 12(%r15),%edx movzbl %bh,%edi movzbl %ch,%ebp movl 0(%r15),%eax xorl 1(%r14,%rdi,8),%r12d xorl 1(%r14,%rbp,8),%r8d movl 4(%r15),%ebx movl 8(%r15),%ecx xorl %r10d,%eax xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx subl $1,%r13d jnz .Lenc_loop movzbl %al,%esi movzbl %bl,%edi movzbl %cl,%ebp movzbl 2(%r14,%rsi,8),%r10d movzbl 2(%r14,%rdi,8),%r11d movzbl 2(%r14,%rbp,8),%r12d movzbl %dl,%esi movzbl %bh,%edi movzbl %ch,%ebp movzbl 2(%r14,%rsi,8),%r8d movl 0(%r14,%rdi,8),%edi movl 0(%r14,%rbp,8),%ebp andl $0x0000ff00,%edi andl $0x0000ff00,%ebp xorl %edi,%r10d xorl %ebp,%r11d shrl $16,%ecx movzbl %dh,%esi movzbl %ah,%edi shrl $16,%edx movl 0(%r14,%rsi,8),%esi movl 0(%r14,%rdi,8),%edi andl $0x0000ff00,%esi andl $0x0000ff00,%edi shrl $16,%ebx xorl %esi,%r12d xorl %edi,%r8d shrl $16,%eax movzbl %cl,%esi movzbl %dl,%edi movzbl %al,%ebp movl 0(%r14,%rsi,8),%esi movl 0(%r14,%rdi,8),%edi movl 0(%r14,%rbp,8),%ebp andl $0x00ff0000,%esi andl $0x00ff0000,%edi andl $0x00ff0000,%ebp xorl %esi,%r10d xorl %edi,%r11d xorl %ebp,%r12d movzbl %bl,%esi movzbl %dh,%edi movzbl %ah,%ebp movl 0(%r14,%rsi,8),%esi movl 2(%r14,%rdi,8),%edi movl 2(%r14,%rbp,8),%ebp andl $0x00ff0000,%esi andl $0xff000000,%edi andl $0xff000000,%ebp xorl %esi,%r8d xorl %edi,%r10d xorl %ebp,%r11d movzbl %bh,%esi movzbl %ch,%edi movl 16+12(%r15),%edx movl 2(%r14,%rsi,8),%esi movl 2(%r14,%rdi,8),%edi movl 16+0(%r15),%eax andl $0xff000000,%esi andl $0xff000000,%edi xorl %esi,%r12d xorl %edi,%r8d movl 16+4(%r15),%ebx movl 16+8(%r15),%ecx xorl %r10d,%eax xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx .byte 0xf3,0xc3 .size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt .type _x86_64_AES_encrypt_compact,@function .align 16 _x86_64_AES_encrypt_compact: leaq 128(%r14),%r8 movl 0-128(%r8),%edi movl 32-128(%r8),%ebp movl 64-128(%r8),%r10d movl 96-128(%r8),%r11d movl 128-128(%r8),%edi movl 160-128(%r8),%ebp movl 192-128(%r8),%r10d movl 224-128(%r8),%r11d jmp .Lenc_loop_compact .align 16 .Lenc_loop_compact: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx leaq 16(%r15),%r15 movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d movzbl %dl,%r8d movzbl %bh,%esi movzbl %ch,%edi shrl $16,%ecx movzbl %dh,%ebp movzbl (%r14,%r10,1),%r10d movzbl (%r14,%r11,1),%r11d movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d movzbl (%r14,%rsi,1),%r9d movzbl %ah,%esi movzbl (%r14,%rdi,1),%r13d movzbl %cl,%edi movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi shll $8,%r9d shrl $16,%edx shll $8,%r13d xorl %r9d,%r10d shrl $16,%eax movzbl %dl,%r9d shrl $16,%ebx xorl %r13d,%r11d shll $8,%ebp movzbl %al,%r13d movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d shll $8,%esi movzbl %bl,%ebp shll $16,%edi xorl %esi,%r8d movzbl (%r14,%r9,1),%r9d movzbl %dh,%esi movzbl (%r14,%r13,1),%r13d xorl %edi,%r10d shrl $8,%ecx movzbl %ah,%edi shll $16,%r9d shrl $8,%ebx shll $16,%r13d xorl %r9d,%r11d movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rcx,1),%edx movzbl (%r14,%rbx,1),%ecx shll $16,%ebp xorl %r13d,%r12d shll $24,%esi xorl %ebp,%r8d shll $24,%edi xorl %esi,%r10d shll $24,%edx xorl %edi,%r11d shll $24,%ecx movl %r10d,%eax movl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 je .Lenc_compact_done movl $0x80808080,%r10d movl $0x80808080,%r11d andl %eax,%r10d andl %ebx,%r11d movl %r10d,%esi movl %r11d,%edi shrl $7,%r10d leal (%rax,%rax,1),%r8d shrl $7,%r11d leal (%rbx,%rbx,1),%r9d subl %r10d,%esi subl %r11d,%edi andl $0xfefefefe,%r8d andl $0xfefefefe,%r9d andl $0x1b1b1b1b,%esi andl $0x1b1b1b1b,%edi movl %eax,%r10d movl %ebx,%r11d xorl %esi,%r8d xorl %edi,%r9d xorl %r8d,%eax xorl %r9d,%ebx movl $0x80808080,%r12d roll $24,%eax movl $0x80808080,%ebp roll $24,%ebx andl %ecx,%r12d andl %edx,%ebp xorl %r8d,%eax xorl %r9d,%ebx movl %r12d,%esi rorl $16,%r10d movl %ebp,%edi rorl $16,%r11d leal (%rcx,%rcx,1),%r8d shrl $7,%r12d xorl %r10d,%eax shrl $7,%ebp xorl %r11d,%ebx rorl $8,%r10d leal (%rdx,%rdx,1),%r9d rorl $8,%r11d subl %r12d,%esi subl %ebp,%edi xorl %r10d,%eax xorl %r11d,%ebx andl $0xfefefefe,%r8d andl $0xfefefefe,%r9d andl $0x1b1b1b1b,%esi andl $0x1b1b1b1b,%edi movl %ecx,%r12d movl %edx,%ebp xorl %esi,%r8d xorl %edi,%r9d rorl $16,%r12d xorl %r8d,%ecx rorl $16,%ebp xorl %r9d,%edx roll $24,%ecx movl 0(%r14),%esi roll $24,%edx xorl %r8d,%ecx movl 64(%r14),%edi xorl %r9d,%edx movl 128(%r14),%r8d xorl %r12d,%ecx rorl $8,%r12d xorl %ebp,%edx rorl $8,%ebp xorl %r12d,%ecx movl 192(%r14),%r9d xorl %ebp,%edx jmp .Lenc_loop_compact .align 16 .Lenc_compact_done: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact .globl AES_encrypt .type AES_encrypt,@function .align 16 .globl asm_AES_encrypt .hidden asm_AES_encrypt asm_AES_encrypt: AES_encrypt: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r10 leaq -63(%rdx),%rcx andq $-64,%rsp subq %rsp,%rcx negq %rcx andq $0x3c0,%rcx subq %rcx,%rsp subq $32,%rsp movq %rsi,16(%rsp) movq %r10,24(%rsp) .Lenc_prologue: movq %rdx,%r15 movl 240(%r15),%r13d movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx movl 12(%rdi),%edx shll $4,%r13d leaq (%r15,%r13,1),%rbp movq %r15,(%rsp) movq %rbp,8(%rsp) leaq .LAES_Te+2048(%rip),%r14 leaq 768(%rsp),%rbp subq %r14,%rbp andq $0x300,%rbp leaq (%r14,%rbp,1),%r14 call _x86_64_AES_encrypt_compact movq 16(%rsp),%r9 movq 24(%rsp),%rsi movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lenc_epilogue: .byte 0xf3,0xc3 .size AES_encrypt,.-AES_encrypt .type _x86_64_AES_decrypt,@function .align 16 _x86_64_AES_decrypt: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx movl 240(%r15),%r13d subl $1,%r13d jmp .Ldec_loop .align 16 .Ldec_loop: movzbl %al,%esi movzbl %bl,%edi movzbl %cl,%ebp movl 0(%r14,%rsi,8),%r10d movl 0(%r14,%rdi,8),%r11d movl 0(%r14,%rbp,8),%r12d movzbl %dh,%esi movzbl %ah,%edi movzbl %dl,%ebp xorl 3(%r14,%rsi,8),%r10d xorl 3(%r14,%rdi,8),%r11d movl 0(%r14,%rbp,8),%r8d movzbl %bh,%esi shrl $16,%eax movzbl %ch,%ebp xorl 3(%r14,%rsi,8),%r12d shrl $16,%edx xorl 3(%r14,%rbp,8),%r8d shrl $16,%ebx leaq 16(%r15),%r15 shrl $16,%ecx movzbl %cl,%esi movzbl %dl,%edi movzbl %al,%ebp xorl 2(%r14,%rsi,8),%r10d xorl 2(%r14,%rdi,8),%r11d xorl 2(%r14,%rbp,8),%r12d movzbl %bh,%esi movzbl %ch,%edi movzbl %bl,%ebp xorl 1(%r14,%rsi,8),%r10d xorl 1(%r14,%rdi,8),%r11d xorl 2(%r14,%rbp,8),%r8d movzbl %dh,%esi movl 12(%r15),%edx movzbl %ah,%ebp xorl 1(%r14,%rsi,8),%r12d movl 0(%r15),%eax xorl 1(%r14,%rbp,8),%r8d xorl %r10d,%eax movl 4(%r15),%ebx movl 8(%r15),%ecx xorl %r12d,%ecx xorl %r11d,%ebx xorl %r8d,%edx subl $1,%r13d jnz .Ldec_loop leaq 2048(%r14),%r14 movzbl %al,%esi movzbl %bl,%edi movzbl %cl,%ebp movzbl (%r14,%rsi,1),%r10d movzbl (%r14,%rdi,1),%r11d movzbl (%r14,%rbp,1),%r12d movzbl %dl,%esi movzbl %dh,%edi movzbl %ah,%ebp movzbl (%r14,%rsi,1),%r8d movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rbp,1),%ebp shll $8,%edi shll $8,%ebp xorl %edi,%r10d xorl %ebp,%r11d shrl $16,%edx movzbl %bh,%esi movzbl %ch,%edi shrl $16,%eax movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi shll $8,%esi shll $8,%edi shrl $16,%ebx xorl %esi,%r12d xorl %edi,%r8d shrl $16,%ecx movzbl %cl,%esi movzbl %dl,%edi movzbl %al,%ebp movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rbp,1),%ebp shll $16,%esi shll $16,%edi shll $16,%ebp xorl %esi,%r10d xorl %edi,%r11d xorl %ebp,%r12d movzbl %bl,%esi movzbl %bh,%edi movzbl %ch,%ebp movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rbp,1),%ebp shll $16,%esi shll $24,%edi shll $24,%ebp xorl %esi,%r8d xorl %edi,%r10d xorl %ebp,%r11d movzbl %dh,%esi movzbl %ah,%edi movl 16+12(%r15),%edx movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movl 16+0(%r15),%eax shll $24,%esi shll $24,%edi xorl %esi,%r12d xorl %edi,%r8d movl 16+4(%r15),%ebx movl 16+8(%r15),%ecx leaq -2048(%r14),%r14 xorl %r10d,%eax xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx .byte 0xf3,0xc3 .size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt .type _x86_64_AES_decrypt_compact,@function .align 16 _x86_64_AES_decrypt_compact: leaq 128(%r14),%r8 movl 0-128(%r8),%edi movl 32-128(%r8),%ebp movl 64-128(%r8),%r10d movl 96-128(%r8),%r11d movl 128-128(%r8),%edi movl 160-128(%r8),%ebp movl 192-128(%r8),%r10d movl 224-128(%r8),%r11d jmp .Ldec_loop_compact .align 16 .Ldec_loop_compact: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx leaq 16(%r15),%r15 movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d movzbl %dl,%r8d movzbl %dh,%esi movzbl %ah,%edi shrl $16,%edx movzbl %bh,%ebp movzbl (%r14,%r10,1),%r10d movzbl (%r14,%r11,1),%r11d movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d movzbl (%r14,%rsi,1),%r9d movzbl %ch,%esi movzbl (%r14,%rdi,1),%r13d movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi shrl $16,%ecx shll $8,%r13d shll $8,%r9d movzbl %cl,%edi shrl $16,%eax xorl %r9d,%r10d shrl $16,%ebx movzbl %dl,%r9d shll $8,%ebp xorl %r13d,%r11d shll $8,%esi movzbl %al,%r13d movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d movzbl %bl,%ebp shll $16,%edi xorl %esi,%r8d movzbl (%r14,%r9,1),%r9d movzbl %bh,%esi movzbl (%r14,%rbp,1),%ebp xorl %edi,%r10d movzbl (%r14,%r13,1),%r13d movzbl %ch,%edi shll $16,%ebp shll $16,%r9d shll $16,%r13d xorl %ebp,%r8d movzbl %dh,%ebp xorl %r9d,%r11d shrl $8,%eax xorl %r13d,%r12d movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%ebx movzbl (%r14,%rbp,1),%ecx movzbl (%r14,%rax,1),%edx movl %r10d,%eax shll $24,%esi shll $24,%ebx shll $24,%ecx xorl %esi,%eax shll $24,%edx xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 je .Ldec_compact_done movq 256+0(%r14),%rsi shlq $32,%rbx shlq $32,%rdx movq 256+8(%r14),%rdi orq %rbx,%rax orq %rdx,%rcx movq 256+16(%r14),%rbp movq %rsi,%r9 movq %rsi,%r12 andq %rax,%r9 andq %rcx,%r12 movq %r9,%rbx movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 leaq (%rcx,%rcx,1),%r11 subq %r9,%rbx subq %r12,%rdx andq %rdi,%r8 andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r8 xorq %rdx,%r11 movq %rsi,%r10 movq %rsi,%r13 andq %r8,%r10 andq %r11,%r13 movq %r10,%rbx movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 leaq (%r11,%r11,1),%r12 subq %r10,%rbx subq %r13,%rdx andq %rdi,%r9 andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r9 xorq %rdx,%r12 movq %rsi,%r10 movq %rsi,%r13 andq %r9,%r10 andq %r12,%r13 movq %r10,%rbx movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 xorq %rcx,%r11 subq %r10,%rbx subq %r13,%rdx leaq (%r9,%r9,1),%r10 leaq (%r12,%r12,1),%r13 xorq %rax,%r9 xorq %rcx,%r12 andq %rdi,%r10 andq %rdi,%r13 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r10 xorq %rdx,%r13 xorq %r10,%rax xorq %r13,%rcx xorq %r10,%r8 xorq %r13,%r11 movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 shrq $32,%rbx xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 roll $8,%eax xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 roll $8,%ebx xorq %r12,%r13 roll $8,%edx xorl %r10d,%eax shrq $32,%r10 xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 roll $24,%r8d movq %r11,%r13 roll $24,%r11d shrq $32,%r10 xorl %r8d,%eax shrq $32,%r13 xorl %r11d,%ecx roll $24,%r10d movq %r9,%r8 roll $24,%r13d movq %r12,%r11 shrq $32,%r8 xorl %r10d,%ebx shrq $32,%r11 xorl %r13d,%edx movq 0(%r14),%rsi roll $16,%r9d movq 64(%r14),%rdi roll $16,%r12d movq 128(%r14),%rbp roll $16,%r8d movq 192(%r14),%r10 xorl %r9d,%eax roll $16,%r11d xorl %r12d,%ecx movq 256(%r14),%r13 xorl %r8d,%ebx xorl %r11d,%edx jmp .Ldec_loop_compact .align 16 .Ldec_compact_done: xorl 0(%r15),%eax xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact .globl AES_decrypt .type AES_decrypt,@function .align 16 .globl asm_AES_decrypt .hidden asm_AES_decrypt asm_AES_decrypt: AES_decrypt: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r10 leaq -63(%rdx),%rcx andq $-64,%rsp subq %rsp,%rcx negq %rcx andq $0x3c0,%rcx subq %rcx,%rsp subq $32,%rsp movq %rsi,16(%rsp) movq %r10,24(%rsp) .Ldec_prologue: movq %rdx,%r15 movl 240(%r15),%r13d movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx movl 12(%rdi),%edx shll $4,%r13d leaq (%r15,%r13,1),%rbp movq %r15,(%rsp) movq %rbp,8(%rsp) leaq .LAES_Td+2048(%rip),%r14 leaq 768(%rsp),%rbp subq %r14,%rbp andq $0x300,%rbp leaq (%r14,%rbp,1),%r14 shrq $3,%rbp addq %rbp,%r14 call _x86_64_AES_decrypt_compact movq 16(%rsp),%r9 movq 24(%rsp),%rsi movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Ldec_epilogue: .byte 0xf3,0xc3 .size AES_decrypt,.-AES_decrypt .globl private_AES_set_encrypt_key .type private_AES_set_encrypt_key,@function .align 16 private_AES_set_encrypt_key: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $8,%rsp .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key movq 40(%rsp),%rbp movq 48(%rsp),%rbx addq $56,%rsp .Lenc_key_epilogue: .byte 0xf3,0xc3 .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,@function .align 16 _x86_64_AES_set_encrypt_key: movl %esi,%ecx movq %rdi,%rsi movq %rdx,%rdi testq $-1,%rsi jz .Lbadpointer testq $-1,%rdi jz .Lbadpointer leaq .LAES_Te(%rip),%rbp leaq 2048+128(%rbp),%rbp movl 0-128(%rbp),%eax movl 32-128(%rbp),%ebx movl 64-128(%rbp),%r8d movl 96-128(%rbp),%edx movl 128-128(%rbp),%eax movl 160-128(%rbp),%ebx movl 192-128(%rbp),%r8d movl 224-128(%rbp),%edx cmpl $128,%ecx je .L10rounds cmpl $192,%ecx je .L12rounds cmpl $256,%ecx je .L14rounds movq $-2,%rax jmp .Lexit .L10rounds: movq 0(%rsi),%rax movq 8(%rsi),%rdx movq %rax,0(%rdi) movq %rdx,8(%rdi) shrq $32,%rdx xorl %ecx,%ecx jmp .L10shortcut .align 4 .L10loop: movl 0(%rdi),%eax movl 12(%rdi),%edx .L10shortcut: movzbl %dl,%esi movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 1024-128(%rbp,%rcx,4),%eax movl %eax,16(%rdi) xorl 4(%rdi),%eax movl %eax,20(%rdi) xorl 8(%rdi),%eax movl %eax,24(%rdi) xorl 12(%rdi),%eax movl %eax,28(%rdi) addl $1,%ecx leaq 16(%rdi),%rdi cmpl $10,%ecx jl .L10loop movl $10,80(%rdi) xorq %rax,%rax jmp .Lexit .L12rounds: movq 0(%rsi),%rax movq 8(%rsi),%rbx movq 16(%rsi),%rdx movq %rax,0(%rdi) movq %rbx,8(%rdi) movq %rdx,16(%rdi) shrq $32,%rdx xorl %ecx,%ecx jmp .L12shortcut .align 4 .L12loop: movl 0(%rdi),%eax movl 20(%rdi),%edx .L12shortcut: movzbl %dl,%esi movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 1024-128(%rbp,%rcx,4),%eax movl %eax,24(%rdi) xorl 4(%rdi),%eax movl %eax,28(%rdi) xorl 8(%rdi),%eax movl %eax,32(%rdi) xorl 12(%rdi),%eax movl %eax,36(%rdi) cmpl $7,%ecx je .L12break addl $1,%ecx xorl 16(%rdi),%eax movl %eax,40(%rdi) xorl 20(%rdi),%eax movl %eax,44(%rdi) leaq 24(%rdi),%rdi jmp .L12loop .L12break: movl $12,72(%rdi) xorq %rax,%rax jmp .Lexit .L14rounds: movq 0(%rsi),%rax movq 8(%rsi),%rbx movq 16(%rsi),%rcx movq 24(%rsi),%rdx movq %rax,0(%rdi) movq %rbx,8(%rdi) movq %rcx,16(%rdi) movq %rdx,24(%rdi) shrq $32,%rdx xorl %ecx,%ecx jmp .L14shortcut .align 4 .L14loop: movl 0(%rdi),%eax movl 28(%rdi),%edx .L14shortcut: movzbl %dl,%esi movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 1024-128(%rbp,%rcx,4),%eax movl %eax,32(%rdi) xorl 4(%rdi),%eax movl %eax,36(%rdi) xorl 8(%rdi),%eax movl %eax,40(%rdi) xorl 12(%rdi),%eax movl %eax,44(%rdi) cmpl $6,%ecx je .L14break addl $1,%ecx movl %eax,%edx movl 16(%rdi),%eax movzbl %dl,%esi movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shrl $16,%edx shll $8,%ebx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx movzbl %dh,%esi shll $16,%ebx xorl %ebx,%eax movzbl -128(%rbp,%rsi,1),%ebx shll $24,%ebx xorl %ebx,%eax movl %eax,48(%rdi) xorl 20(%rdi),%eax movl %eax,52(%rdi) xorl 24(%rdi),%eax movl %eax,56(%rdi) xorl 28(%rdi),%eax movl %eax,60(%rdi) leaq 32(%rdi),%rdi jmp .L14loop .L14break: movl $14,48(%rdi) xorq %rax,%rax jmp .Lexit .Lbadpointer: movq $-1,%rax .Lexit: .byte 0xf3,0xc3 .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key .globl private_AES_set_decrypt_key .type private_AES_set_decrypt_key,@function .align 16 private_AES_set_decrypt_key: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushq %rdx .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key movq (%rsp),%r8 cmpl $0,%eax jne .Labort movl 240(%r8),%r14d xorq %rdi,%rdi leaq (%rdi,%r14,4),%rcx movq %r8,%rsi leaq (%r8,%rcx,4),%rdi .align 4 .Linvert: movq 0(%rsi),%rax movq 8(%rsi),%rbx movq 0(%rdi),%rcx movq 8(%rdi),%rdx movq %rax,0(%rdi) movq %rbx,8(%rdi) movq %rcx,0(%rsi) movq %rdx,8(%rsi) leaq 16(%rsi),%rsi leaq -16(%rdi),%rdi cmpq %rsi,%rdi jne .Linvert leaq .LAES_Te+2048+1024(%rip),%rax movq 40(%rax),%rsi movq 48(%rax),%rdi movq 56(%rax),%rbp movq %r8,%r15 subl $1,%r14d .align 4 .Lpermute: leaq 16(%r15),%r15 movq 0(%r15),%rax movq 8(%r15),%rcx movq %rsi,%r9 movq %rsi,%r12 andq %rax,%r9 andq %rcx,%r12 movq %r9,%rbx movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 leaq (%rcx,%rcx,1),%r11 subq %r9,%rbx subq %r12,%rdx andq %rdi,%r8 andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r8 xorq %rdx,%r11 movq %rsi,%r10 movq %rsi,%r13 andq %r8,%r10 andq %r11,%r13 movq %r10,%rbx movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 leaq (%r11,%r11,1),%r12 subq %r10,%rbx subq %r13,%rdx andq %rdi,%r9 andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r9 xorq %rdx,%r12 movq %rsi,%r10 movq %rsi,%r13 andq %r9,%r10 andq %r12,%r13 movq %r10,%rbx movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 xorq %rcx,%r11 subq %r10,%rbx subq %r13,%rdx leaq (%r9,%r9,1),%r10 leaq (%r12,%r12,1),%r13 xorq %rax,%r9 xorq %rcx,%r12 andq %rdi,%r10 andq %rdi,%r13 andq %rbp,%rbx andq %rbp,%rdx xorq %rbx,%r10 xorq %rdx,%r13 xorq %r10,%rax xorq %r13,%rcx xorq %r10,%r8 xorq %r13,%r11 movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 shrq $32,%rbx xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 roll $8,%eax xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 roll $8,%ebx xorq %r12,%r13 roll $8,%edx xorl %r10d,%eax shrq $32,%r10 xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 roll $24,%r8d movq %r11,%r13 roll $24,%r11d shrq $32,%r10 xorl %r8d,%eax shrq $32,%r13 xorl %r11d,%ecx roll $24,%r10d movq %r9,%r8 roll $24,%r13d movq %r12,%r11 shrq $32,%r8 xorl %r10d,%ebx shrq $32,%r11 xorl %r13d,%edx roll $16,%r9d roll $16,%r12d roll $16,%r8d xorl %r9d,%eax roll $16,%r11d xorl %r12d,%ecx xorl %r8d,%ebx xorl %r11d,%edx movl %eax,0(%r15) movl %ebx,4(%r15) movl %ecx,8(%r15) movl %edx,12(%r15) subl $1,%r14d jnz .Lpermute xorq %rax,%rax .Labort: movq 8(%rsp),%r15 movq 16(%rsp),%r14 movq 24(%rsp),%r13 movq 32(%rsp),%r12 movq 40(%rsp),%rbp movq 48(%rsp),%rbx addq $56,%rsp .Ldec_key_epilogue: .byte 0xf3,0xc3 .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .globl AES_cbc_encrypt .type AES_cbc_encrypt,@function .align 16 .globl asm_AES_cbc_encrypt .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: AES_cbc_encrypt: cmpq $0,%rdx je .Lcbc_epilogue pushfq pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 .Lcbc_prologue: cld movl %r9d,%r9d leaq .LAES_Te(%rip),%r14 cmpq $0,%r9 jne .Lcbc_picked_te leaq .LAES_Td(%rip),%r14 .Lcbc_picked_te: movl OPENSSL_ia32cap_P(%rip),%r10d cmpq $512,%rdx jb .Lcbc_slow_prologue testq $15,%rdx jnz .Lcbc_slow_prologue btl $28,%r10d jc .Lcbc_slow_prologue leaq -88-248(%rsp),%r15 andq $-64,%r15 movq %r14,%r10 leaq 2304(%r14),%r11 movq %r15,%r12 andq $0xFFF,%r10 andq $0xFFF,%r11 andq $0xFFF,%r12 cmpq %r11,%r12 jb .Lcbc_te_break_out subq %r11,%r12 subq %r12,%r15 jmp .Lcbc_te_ok .Lcbc_te_break_out: subq %r10,%r12 andq $0xFFF,%r12 addq $320,%r12 subq %r12,%r15 .align 4 .Lcbc_te_ok: xchgq %rsp,%r15 movq %r15,16(%rsp) .Lcbc_fast_body: movq %rdi,24(%rsp) movq %rsi,32(%rsp) movq %rdx,40(%rsp) movq %rcx,48(%rsp) movq %r8,56(%rsp) movl $0,80+240(%rsp) movq %r8,%rbp movq %r9,%rbx movq %rsi,%r9 movq %rdi,%r8 movq %rcx,%r15 movl 240(%r15),%eax movq %r15,%r10 subq %r14,%r10 andq $0xfff,%r10 cmpq $2304,%r10 jb .Lcbc_do_ecopy cmpq $4096-248,%r10 jb .Lcbc_skip_ecopy .align 4 .Lcbc_do_ecopy: movq %r15,%rsi leaq 80(%rsp),%rdi leaq 80(%rsp),%r15 movl $30,%ecx .long 0x90A548F3 movl %eax,(%rdi) .Lcbc_skip_ecopy: movq %r15,0(%rsp) movl $18,%ecx .align 4 .Lcbc_prefetch_te: movq 0(%r14),%r10 movq 32(%r14),%r11 movq 64(%r14),%r12 movq 96(%r14),%r13 leaq 128(%r14),%r14 subl $1,%ecx jnz .Lcbc_prefetch_te leaq -2304(%r14),%r14 cmpq $0,%rbx je .LFAST_DECRYPT movl 0(%rbp),%eax movl 4(%rbp),%ebx movl 8(%rbp),%ecx movl 12(%rbp),%edx .align 4 .Lcbc_fast_enc_loop: xorl 0(%r8),%eax xorl 4(%r8),%ebx xorl 8(%r8),%ecx xorl 12(%r8),%edx movq 0(%rsp),%r15 movq %r8,24(%rsp) call _x86_64_AES_encrypt movq 24(%rsp),%r8 movq 40(%rsp),%r10 movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) leaq 16(%r8),%r8 leaq 16(%r9),%r9 subq $16,%r10 testq $-16,%r10 movq %r10,40(%rsp) jnz .Lcbc_fast_enc_loop movq 56(%rsp),%rbp movl %eax,0(%rbp) movl %ebx,4(%rbp) movl %ecx,8(%rbp) movl %edx,12(%rbp) jmp .Lcbc_fast_cleanup .align 16 .LFAST_DECRYPT: cmpq %r8,%r9 je .Lcbc_fast_dec_in_place movq %rbp,64(%rsp) .align 4 .Lcbc_fast_dec_loop: movl 0(%r8),%eax movl 4(%r8),%ebx movl 8(%r8),%ecx movl 12(%r8),%edx movq 0(%rsp),%r15 movq %r8,24(%rsp) call _x86_64_AES_decrypt movq 64(%rsp),%rbp movq 24(%rsp),%r8 movq 40(%rsp),%r10 xorl 0(%rbp),%eax xorl 4(%rbp),%ebx xorl 8(%rbp),%ecx xorl 12(%rbp),%edx movq %r8,%rbp subq $16,%r10 movq %r10,40(%rsp) movq %rbp,64(%rsp) movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) leaq 16(%r8),%r8 leaq 16(%r9),%r9 jnz .Lcbc_fast_dec_loop movq 56(%rsp),%r12 movq 0(%rbp),%r10 movq 8(%rbp),%r11 movq %r10,0(%r12) movq %r11,8(%r12) jmp .Lcbc_fast_cleanup .align 16 .Lcbc_fast_dec_in_place: movq 0(%rbp),%r10 movq 8(%rbp),%r11 movq %r10,0+64(%rsp) movq %r11,8+64(%rsp) .align 4 .Lcbc_fast_dec_in_place_loop: movl 0(%r8),%eax movl 4(%r8),%ebx movl 8(%r8),%ecx movl 12(%r8),%edx movq 0(%rsp),%r15 movq %r8,24(%rsp) call _x86_64_AES_decrypt movq 24(%rsp),%r8 movq 40(%rsp),%r10 xorl 0+64(%rsp),%eax xorl 4+64(%rsp),%ebx xorl 8+64(%rsp),%ecx xorl 12+64(%rsp),%edx movq 0(%r8),%r11 movq 8(%r8),%r12 subq $16,%r10 jz .Lcbc_fast_dec_in_place_done movq %r11,0+64(%rsp) movq %r12,8+64(%rsp) movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) leaq 16(%r8),%r8 leaq 16(%r9),%r9 movq %r10,40(%rsp) jmp .Lcbc_fast_dec_in_place_loop .Lcbc_fast_dec_in_place_done: movq 56(%rsp),%rdi movq %r11,0(%rdi) movq %r12,8(%rdi) movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) .align 4 .Lcbc_fast_cleanup: cmpl $0,80+240(%rsp) leaq 80(%rsp),%rdi je .Lcbc_exit movl $30,%ecx xorq %rax,%rax .long 0x90AB48F3 jmp .Lcbc_exit .align 16 .Lcbc_slow_prologue: leaq -88(%rsp),%rbp andq $-64,%rbp leaq -88-63(%rcx),%r10 subq %rbp,%r10 negq %r10 andq $0x3c0,%r10 subq %r10,%rbp xchgq %rsp,%rbp movq %rbp,16(%rsp) .Lcbc_slow_body: movq %r8,56(%rsp) movq %r8,%rbp movq %r9,%rbx movq %rsi,%r9 movq %rdi,%r8 movq %rcx,%r15 movq %rdx,%r10 movl 240(%r15),%eax movq %r15,0(%rsp) shll $4,%eax leaq (%r15,%rax,1),%rax movq %rax,8(%rsp) leaq 2048(%r14),%r14 leaq 768-8(%rsp),%rax subq %r14,%rax andq $0x300,%rax leaq (%r14,%rax,1),%r14 cmpq $0,%rbx je .LSLOW_DECRYPT testq $-16,%r10 movl 0(%rbp),%eax movl 4(%rbp),%ebx movl 8(%rbp),%ecx movl 12(%rbp),%edx jz .Lcbc_slow_enc_tail .align 4 .Lcbc_slow_enc_loop: xorl 0(%r8),%eax xorl 4(%r8),%ebx xorl 8(%r8),%ecx xorl 12(%r8),%edx movq 0(%rsp),%r15 movq %r8,24(%rsp) movq %r9,32(%rsp) movq %r10,40(%rsp) call _x86_64_AES_encrypt_compact movq 24(%rsp),%r8 movq 32(%rsp),%r9 movq 40(%rsp),%r10 movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) leaq 16(%r8),%r8 leaq 16(%r9),%r9 subq $16,%r10 testq $-16,%r10 jnz .Lcbc_slow_enc_loop testq $15,%r10 jnz .Lcbc_slow_enc_tail movq 56(%rsp),%rbp movl %eax,0(%rbp) movl %ebx,4(%rbp) movl %ecx,8(%rbp) movl %edx,12(%rbp) jmp .Lcbc_exit .align 4 .Lcbc_slow_enc_tail: movq %rax,%r11 movq %rcx,%r12 movq %r10,%rcx movq %r8,%rsi movq %r9,%rdi .long 0x9066A4F3 movq $16,%rcx subq %r10,%rcx xorq %rax,%rax .long 0x9066AAF3 movq %r9,%r8 movq $16,%r10 movq %r11,%rax movq %r12,%rcx jmp .Lcbc_slow_enc_loop .align 16 .LSLOW_DECRYPT: shrq $3,%rax addq %rax,%r14 movq 0(%rbp),%r11 movq 8(%rbp),%r12 movq %r11,0+64(%rsp) movq %r12,8+64(%rsp) .align 4 .Lcbc_slow_dec_loop: movl 0(%r8),%eax movl 4(%r8),%ebx movl 8(%r8),%ecx movl 12(%r8),%edx movq 0(%rsp),%r15 movq %r8,24(%rsp) movq %r9,32(%rsp) movq %r10,40(%rsp) call _x86_64_AES_decrypt_compact movq 24(%rsp),%r8 movq 32(%rsp),%r9 movq 40(%rsp),%r10 xorl 0+64(%rsp),%eax xorl 4+64(%rsp),%ebx xorl 8+64(%rsp),%ecx xorl 12+64(%rsp),%edx movq 0(%r8),%r11 movq 8(%r8),%r12 subq $16,%r10 jc .Lcbc_slow_dec_partial jz .Lcbc_slow_dec_done movq %r11,0+64(%rsp) movq %r12,8+64(%rsp) movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) leaq 16(%r8),%r8 leaq 16(%r9),%r9 jmp .Lcbc_slow_dec_loop .Lcbc_slow_dec_done: movq 56(%rsp),%rdi movq %r11,0(%rdi) movq %r12,8(%rdi) movl %eax,0(%r9) movl %ebx,4(%r9) movl %ecx,8(%r9) movl %edx,12(%r9) jmp .Lcbc_exit .align 4 .Lcbc_slow_dec_partial: movq 56(%rsp),%rdi movq %r11,0(%rdi) movq %r12,8(%rdi) movl %eax,0+64(%rsp) movl %ebx,4+64(%rsp) movl %ecx,8+64(%rsp) movl %edx,12+64(%rsp) movq %r9,%rdi leaq 64(%rsp),%rsi leaq 16(%r10),%rcx .long 0x9066A4F3 jmp .Lcbc_exit .align 16 .Lcbc_exit: movq 16(%rsp),%rsi movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lcbc_popfq: popfq .Lcbc_epilogue: .byte 0xf3,0xc3 .size AES_cbc_encrypt,.-AES_cbc_encrypt .align 64 .LAES_Te: .long 0xa56363c6,0xa56363c6 .long 0x847c7cf8,0x847c7cf8 .long 0x997777ee,0x997777ee .long 0x8d7b7bf6,0x8d7b7bf6 .long 0x0df2f2ff,0x0df2f2ff .long 0xbd6b6bd6,0xbd6b6bd6 .long 0xb16f6fde,0xb16f6fde .long 0x54c5c591,0x54c5c591 .long 0x50303060,0x50303060 .long 0x03010102,0x03010102 .long 0xa96767ce,0xa96767ce .long 0x7d2b2b56,0x7d2b2b56 .long 0x19fefee7,0x19fefee7 .long 0x62d7d7b5,0x62d7d7b5 .long 0xe6abab4d,0xe6abab4d .long 0x9a7676ec,0x9a7676ec .long 0x45caca8f,0x45caca8f .long 0x9d82821f,0x9d82821f .long 0x40c9c989,0x40c9c989 .long 0x877d7dfa,0x877d7dfa .long 0x15fafaef,0x15fafaef .long 0xeb5959b2,0xeb5959b2 .long 0xc947478e,0xc947478e .long 0x0bf0f0fb,0x0bf0f0fb .long 0xecadad41,0xecadad41 .long 0x67d4d4b3,0x67d4d4b3 .long 0xfda2a25f,0xfda2a25f .long 0xeaafaf45,0xeaafaf45 .long 0xbf9c9c23,0xbf9c9c23 .long 0xf7a4a453,0xf7a4a453 .long 0x967272e4,0x967272e4 .long 0x5bc0c09b,0x5bc0c09b .long 0xc2b7b775,0xc2b7b775 .long 0x1cfdfde1,0x1cfdfde1 .long 0xae93933d,0xae93933d .long 0x6a26264c,0x6a26264c .long 0x5a36366c,0x5a36366c .long 0x413f3f7e,0x413f3f7e .long 0x02f7f7f5,0x02f7f7f5 .long 0x4fcccc83,0x4fcccc83 .long 0x5c343468,0x5c343468 .long 0xf4a5a551,0xf4a5a551 .long 0x34e5e5d1,0x34e5e5d1 .long 0x08f1f1f9,0x08f1f1f9 .long 0x937171e2,0x937171e2 .long 0x73d8d8ab,0x73d8d8ab .long 0x53313162,0x53313162 .long 0x3f15152a,0x3f15152a .long 0x0c040408,0x0c040408 .long 0x52c7c795,0x52c7c795 .long 0x65232346,0x65232346 .long 0x5ec3c39d,0x5ec3c39d .long 0x28181830,0x28181830 .long 0xa1969637,0xa1969637 .long 0x0f05050a,0x0f05050a .long 0xb59a9a2f,0xb59a9a2f .long 0x0907070e,0x0907070e .long 0x36121224,0x36121224 .long 0x9b80801b,0x9b80801b .long 0x3de2e2df,0x3de2e2df .long 0x26ebebcd,0x26ebebcd .long 0x6927274e,0x6927274e .long 0xcdb2b27f,0xcdb2b27f .long 0x9f7575ea,0x9f7575ea .long 0x1b090912,0x1b090912 .long 0x9e83831d,0x9e83831d .long 0x742c2c58,0x742c2c58 .long 0x2e1a1a34,0x2e1a1a34 .long 0x2d1b1b36,0x2d1b1b36 .long 0xb26e6edc,0xb26e6edc .long 0xee5a5ab4,0xee5a5ab4 .long 0xfba0a05b,0xfba0a05b .long 0xf65252a4,0xf65252a4 .long 0x4d3b3b76,0x4d3b3b76 .long 0x61d6d6b7,0x61d6d6b7 .long 0xceb3b37d,0xceb3b37d .long 0x7b292952,0x7b292952 .long 0x3ee3e3dd,0x3ee3e3dd .long 0x712f2f5e,0x712f2f5e .long 0x97848413,0x97848413 .long 0xf55353a6,0xf55353a6 .long 0x68d1d1b9,0x68d1d1b9 .long 0x00000000,0x00000000 .long 0x2cededc1,0x2cededc1 .long 0x60202040,0x60202040 .long 0x1ffcfce3,0x1ffcfce3 .long 0xc8b1b179,0xc8b1b179 .long 0xed5b5bb6,0xed5b5bb6 .long 0xbe6a6ad4,0xbe6a6ad4 .long 0x46cbcb8d,0x46cbcb8d .long 0xd9bebe67,0xd9bebe67 .long 0x4b393972,0x4b393972 .long 0xde4a4a94,0xde4a4a94 .long 0xd44c4c98,0xd44c4c98 .long 0xe85858b0,0xe85858b0 .long 0x4acfcf85,0x4acfcf85 .long 0x6bd0d0bb,0x6bd0d0bb .long 0x2aefefc5,0x2aefefc5 .long 0xe5aaaa4f,0xe5aaaa4f .long 0x16fbfbed,0x16fbfbed .long 0xc5434386,0xc5434386 .long 0xd74d4d9a,0xd74d4d9a .long 0x55333366,0x55333366 .long 0x94858511,0x94858511 .long 0xcf45458a,0xcf45458a .long 0x10f9f9e9,0x10f9f9e9 .long 0x06020204,0x06020204 .long 0x817f7ffe,0x817f7ffe .long 0xf05050a0,0xf05050a0 .long 0x443c3c78,0x443c3c78 .long 0xba9f9f25,0xba9f9f25 .long 0xe3a8a84b,0xe3a8a84b .long 0xf35151a2,0xf35151a2 .long 0xfea3a35d,0xfea3a35d .long 0xc0404080,0xc0404080 .long 0x8a8f8f05,0x8a8f8f05 .long 0xad92923f,0xad92923f .long 0xbc9d9d21,0xbc9d9d21 .long 0x48383870,0x48383870 .long 0x04f5f5f1,0x04f5f5f1 .long 0xdfbcbc63,0xdfbcbc63 .long 0xc1b6b677,0xc1b6b677 .long 0x75dadaaf,0x75dadaaf .long 0x63212142,0x63212142 .long 0x30101020,0x30101020 .long 0x1affffe5,0x1affffe5 .long 0x0ef3f3fd,0x0ef3f3fd .long 0x6dd2d2bf,0x6dd2d2bf .long 0x4ccdcd81,0x4ccdcd81 .long 0x140c0c18,0x140c0c18 .long 0x35131326,0x35131326 .long 0x2fececc3,0x2fececc3 .long 0xe15f5fbe,0xe15f5fbe .long 0xa2979735,0xa2979735 .long 0xcc444488,0xcc444488 .long 0x3917172e,0x3917172e .long 0x57c4c493,0x57c4c493 .long 0xf2a7a755,0xf2a7a755 .long 0x827e7efc,0x827e7efc .long 0x473d3d7a,0x473d3d7a .long 0xac6464c8,0xac6464c8 .long 0xe75d5dba,0xe75d5dba .long 0x2b191932,0x2b191932 .long 0x957373e6,0x957373e6 .long 0xa06060c0,0xa06060c0 .long 0x98818119,0x98818119 .long 0xd14f4f9e,0xd14f4f9e .long 0x7fdcdca3,0x7fdcdca3 .long 0x66222244,0x66222244 .long 0x7e2a2a54,0x7e2a2a54 .long 0xab90903b,0xab90903b .long 0x8388880b,0x8388880b .long 0xca46468c,0xca46468c .long 0x29eeeec7,0x29eeeec7 .long 0xd3b8b86b,0xd3b8b86b .long 0x3c141428,0x3c141428 .long 0x79dedea7,0x79dedea7 .long 0xe25e5ebc,0xe25e5ebc .long 0x1d0b0b16,0x1d0b0b16 .long 0x76dbdbad,0x76dbdbad .long 0x3be0e0db,0x3be0e0db .long 0x56323264,0x56323264 .long 0x4e3a3a74,0x4e3a3a74 .long 0x1e0a0a14,0x1e0a0a14 .long 0xdb494992,0xdb494992 .long 0x0a06060c,0x0a06060c .long 0x6c242448,0x6c242448 .long 0xe45c5cb8,0xe45c5cb8 .long 0x5dc2c29f,0x5dc2c29f .long 0x6ed3d3bd,0x6ed3d3bd .long 0xefacac43,0xefacac43 .long 0xa66262c4,0xa66262c4 .long 0xa8919139,0xa8919139 .long 0xa4959531,0xa4959531 .long 0x37e4e4d3,0x37e4e4d3 .long 0x8b7979f2,0x8b7979f2 .long 0x32e7e7d5,0x32e7e7d5 .long 0x43c8c88b,0x43c8c88b .long 0x5937376e,0x5937376e .long 0xb76d6dda,0xb76d6dda .long 0x8c8d8d01,0x8c8d8d01 .long 0x64d5d5b1,0x64d5d5b1 .long 0xd24e4e9c,0xd24e4e9c .long 0xe0a9a949,0xe0a9a949 .long 0xb46c6cd8,0xb46c6cd8 .long 0xfa5656ac,0xfa5656ac .long 0x07f4f4f3,0x07f4f4f3 .long 0x25eaeacf,0x25eaeacf .long 0xaf6565ca,0xaf6565ca .long 0x8e7a7af4,0x8e7a7af4 .long 0xe9aeae47,0xe9aeae47 .long 0x18080810,0x18080810 .long 0xd5baba6f,0xd5baba6f .long 0x887878f0,0x887878f0 .long 0x6f25254a,0x6f25254a .long 0x722e2e5c,0x722e2e5c .long 0x241c1c38,0x241c1c38 .long 0xf1a6a657,0xf1a6a657 .long 0xc7b4b473,0xc7b4b473 .long 0x51c6c697,0x51c6c697 .long 0x23e8e8cb,0x23e8e8cb .long 0x7cdddda1,0x7cdddda1 .long 0x9c7474e8,0x9c7474e8 .long 0x211f1f3e,0x211f1f3e .long 0xdd4b4b96,0xdd4b4b96 .long 0xdcbdbd61,0xdcbdbd61 .long 0x868b8b0d,0x868b8b0d .long 0x858a8a0f,0x858a8a0f .long 0x907070e0,0x907070e0 .long 0x423e3e7c,0x423e3e7c .long 0xc4b5b571,0xc4b5b571 .long 0xaa6666cc,0xaa6666cc .long 0xd8484890,0xd8484890 .long 0x05030306,0x05030306 .long 0x01f6f6f7,0x01f6f6f7 .long 0x120e0e1c,0x120e0e1c .long 0xa36161c2,0xa36161c2 .long 0x5f35356a,0x5f35356a .long 0xf95757ae,0xf95757ae .long 0xd0b9b969,0xd0b9b969 .long 0x91868617,0x91868617 .long 0x58c1c199,0x58c1c199 .long 0x271d1d3a,0x271d1d3a .long 0xb99e9e27,0xb99e9e27 .long 0x38e1e1d9,0x38e1e1d9 .long 0x13f8f8eb,0x13f8f8eb .long 0xb398982b,0xb398982b .long 0x33111122,0x33111122 .long 0xbb6969d2,0xbb6969d2 .long 0x70d9d9a9,0x70d9d9a9 .long 0x898e8e07,0x898e8e07 .long 0xa7949433,0xa7949433 .long 0xb69b9b2d,0xb69b9b2d .long 0x221e1e3c,0x221e1e3c .long 0x92878715,0x92878715 .long 0x20e9e9c9,0x20e9e9c9 .long 0x49cece87,0x49cece87 .long 0xff5555aa,0xff5555aa .long 0x78282850,0x78282850 .long 0x7adfdfa5,0x7adfdfa5 .long 0x8f8c8c03,0x8f8c8c03 .long 0xf8a1a159,0xf8a1a159 .long 0x80898909,0x80898909 .long 0x170d0d1a,0x170d0d1a .long 0xdabfbf65,0xdabfbf65 .long 0x31e6e6d7,0x31e6e6d7 .long 0xc6424284,0xc6424284 .long 0xb86868d0,0xb86868d0 .long 0xc3414182,0xc3414182 .long 0xb0999929,0xb0999929 .long 0x772d2d5a,0x772d2d5a .long 0x110f0f1e,0x110f0f1e .long 0xcbb0b07b,0xcbb0b07b .long 0xfc5454a8,0xfc5454a8 .long 0xd6bbbb6d,0xd6bbbb6d .long 0x3a16162c,0x3a16162c .byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 .byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 .byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 .byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 .byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc .byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 .byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a .byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 .byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 .byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 .byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b .byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf .byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 .byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 .byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 .byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 .byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 .byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 .byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 .byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb .byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c .byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 .byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 .byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 .byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 .byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a .byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e .byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e .byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 .byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf .byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 .byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 .byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 .byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 .byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 .byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 .byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc .byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 .byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a .byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 .byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 .byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 .byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b .byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf .byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 .byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 .byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 .byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 .byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 .byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 .byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 .byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb .byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c .byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 .byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 .byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 .byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 .byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a .byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e .byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e .byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 .byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf .byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 .byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 .byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 .byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 .byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 .byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 .byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc .byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 .byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a .byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 .byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 .byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 .byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b .byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf .byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 .byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 .byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 .byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 .byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 .byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 .byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 .byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb .byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c .byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 .byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 .byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 .byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 .byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a .byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e .byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e .byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 .byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf .byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 .byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 .byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 .byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 .byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 .byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 .byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc .byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 .byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a .byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 .byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 .byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 .byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b .byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf .byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 .byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 .byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 .byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 .byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 .byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 .byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 .byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb .byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c .byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 .byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 .byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 .byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 .byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a .byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e .byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e .byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 .byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf .byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 .byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 .long 0x00000001, 0x00000002, 0x00000004, 0x00000008 .long 0x00000010, 0x00000020, 0x00000040, 0x00000080 .long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b .align 64 .LAES_Td: .long 0x50a7f451,0x50a7f451 .long 0x5365417e,0x5365417e .long 0xc3a4171a,0xc3a4171a .long 0x965e273a,0x965e273a .long 0xcb6bab3b,0xcb6bab3b .long 0xf1459d1f,0xf1459d1f .long 0xab58faac,0xab58faac .long 0x9303e34b,0x9303e34b .long 0x55fa3020,0x55fa3020 .long 0xf66d76ad,0xf66d76ad .long 0x9176cc88,0x9176cc88 .long 0x254c02f5,0x254c02f5 .long 0xfcd7e54f,0xfcd7e54f .long 0xd7cb2ac5,0xd7cb2ac5 .long 0x80443526,0x80443526 .long 0x8fa362b5,0x8fa362b5 .long 0x495ab1de,0x495ab1de .long 0x671bba25,0x671bba25 .long 0x980eea45,0x980eea45 .long 0xe1c0fe5d,0xe1c0fe5d .long 0x02752fc3,0x02752fc3 .long 0x12f04c81,0x12f04c81 .long 0xa397468d,0xa397468d .long 0xc6f9d36b,0xc6f9d36b .long 0xe75f8f03,0xe75f8f03 .long 0x959c9215,0x959c9215 .long 0xeb7a6dbf,0xeb7a6dbf .long 0xda595295,0xda595295 .long 0x2d83bed4,0x2d83bed4 .long 0xd3217458,0xd3217458 .long 0x2969e049,0x2969e049 .long 0x44c8c98e,0x44c8c98e .long 0x6a89c275,0x6a89c275 .long 0x78798ef4,0x78798ef4 .long 0x6b3e5899,0x6b3e5899 .long 0xdd71b927,0xdd71b927 .long 0xb64fe1be,0xb64fe1be .long 0x17ad88f0,0x17ad88f0 .long 0x66ac20c9,0x66ac20c9 .long 0xb43ace7d,0xb43ace7d .long 0x184adf63,0x184adf63 .long 0x82311ae5,0x82311ae5 .long 0x60335197,0x60335197 .long 0x457f5362,0x457f5362 .long 0xe07764b1,0xe07764b1 .long 0x84ae6bbb,0x84ae6bbb .long 0x1ca081fe,0x1ca081fe .long 0x942b08f9,0x942b08f9 .long 0x58684870,0x58684870 .long 0x19fd458f,0x19fd458f .long 0x876cde94,0x876cde94 .long 0xb7f87b52,0xb7f87b52 .long 0x23d373ab,0x23d373ab .long 0xe2024b72,0xe2024b72 .long 0x578f1fe3,0x578f1fe3 .long 0x2aab5566,0x2aab5566 .long 0x0728ebb2,0x0728ebb2 .long 0x03c2b52f,0x03c2b52f .long 0x9a7bc586,0x9a7bc586 .long 0xa50837d3,0xa50837d3 .long 0xf2872830,0xf2872830 .long 0xb2a5bf23,0xb2a5bf23 .long 0xba6a0302,0xba6a0302 .long 0x5c8216ed,0x5c8216ed .long 0x2b1ccf8a,0x2b1ccf8a .long 0x92b479a7,0x92b479a7 .long 0xf0f207f3,0xf0f207f3 .long 0xa1e2694e,0xa1e2694e .long 0xcdf4da65,0xcdf4da65 .long 0xd5be0506,0xd5be0506 .long 0x1f6234d1,0x1f6234d1 .long 0x8afea6c4,0x8afea6c4 .long 0x9d532e34,0x9d532e34 .long 0xa055f3a2,0xa055f3a2 .long 0x32e18a05,0x32e18a05 .long 0x75ebf6a4,0x75ebf6a4 .long 0x39ec830b,0x39ec830b .long 0xaaef6040,0xaaef6040 .long 0x069f715e,0x069f715e .long 0x51106ebd,0x51106ebd .long 0xf98a213e,0xf98a213e .long 0x3d06dd96,0x3d06dd96 .long 0xae053edd,0xae053edd .long 0x46bde64d,0x46bde64d .long 0xb58d5491,0xb58d5491 .long 0x055dc471,0x055dc471 .long 0x6fd40604,0x6fd40604 .long 0xff155060,0xff155060 .long 0x24fb9819,0x24fb9819 .long 0x97e9bdd6,0x97e9bdd6 .long 0xcc434089,0xcc434089 .long 0x779ed967,0x779ed967 .long 0xbd42e8b0,0xbd42e8b0 .long 0x888b8907,0x888b8907 .long 0x385b19e7,0x385b19e7 .long 0xdbeec879,0xdbeec879 .long 0x470a7ca1,0x470a7ca1 .long 0xe90f427c,0xe90f427c .long 0xc91e84f8,0xc91e84f8 .long 0x00000000,0x00000000 .long 0x83868009,0x83868009 .long 0x48ed2b32,0x48ed2b32 .long 0xac70111e,0xac70111e .long 0x4e725a6c,0x4e725a6c .long 0xfbff0efd,0xfbff0efd .long 0x5638850f,0x5638850f .long 0x1ed5ae3d,0x1ed5ae3d .long 0x27392d36,0x27392d36 .long 0x64d90f0a,0x64d90f0a .long 0x21a65c68,0x21a65c68 .long 0xd1545b9b,0xd1545b9b .long 0x3a2e3624,0x3a2e3624 .long 0xb1670a0c,0xb1670a0c .long 0x0fe75793,0x0fe75793 .long 0xd296eeb4,0xd296eeb4 .long 0x9e919b1b,0x9e919b1b .long 0x4fc5c080,0x4fc5c080 .long 0xa220dc61,0xa220dc61 .long 0x694b775a,0x694b775a .long 0x161a121c,0x161a121c .long 0x0aba93e2,0x0aba93e2 .long 0xe52aa0c0,0xe52aa0c0 .long 0x43e0223c,0x43e0223c .long 0x1d171b12,0x1d171b12 .long 0x0b0d090e,0x0b0d090e .long 0xadc78bf2,0xadc78bf2 .long 0xb9a8b62d,0xb9a8b62d .long 0xc8a91e14,0xc8a91e14 .long 0x8519f157,0x8519f157 .long 0x4c0775af,0x4c0775af .long 0xbbdd99ee,0xbbdd99ee .long 0xfd607fa3,0xfd607fa3 .long 0x9f2601f7,0x9f2601f7 .long 0xbcf5725c,0xbcf5725c .long 0xc53b6644,0xc53b6644 .long 0x347efb5b,0x347efb5b .long 0x7629438b,0x7629438b .long 0xdcc623cb,0xdcc623cb .long 0x68fcedb6,0x68fcedb6 .long 0x63f1e4b8,0x63f1e4b8 .long 0xcadc31d7,0xcadc31d7 .long 0x10856342,0x10856342 .long 0x40229713,0x40229713 .long 0x2011c684,0x2011c684 .long 0x7d244a85,0x7d244a85 .long 0xf83dbbd2,0xf83dbbd2 .long 0x1132f9ae,0x1132f9ae .long 0x6da129c7,0x6da129c7 .long 0x4b2f9e1d,0x4b2f9e1d .long 0xf330b2dc,0xf330b2dc .long 0xec52860d,0xec52860d .long 0xd0e3c177,0xd0e3c177 .long 0x6c16b32b,0x6c16b32b .long 0x99b970a9,0x99b970a9 .long 0xfa489411,0xfa489411 .long 0x2264e947,0x2264e947 .long 0xc48cfca8,0xc48cfca8 .long 0x1a3ff0a0,0x1a3ff0a0 .long 0xd82c7d56,0xd82c7d56 .long 0xef903322,0xef903322 .long 0xc74e4987,0xc74e4987 .long 0xc1d138d9,0xc1d138d9 .long 0xfea2ca8c,0xfea2ca8c .long 0x360bd498,0x360bd498 .long 0xcf81f5a6,0xcf81f5a6 .long 0x28de7aa5,0x28de7aa5 .long 0x268eb7da,0x268eb7da .long 0xa4bfad3f,0xa4bfad3f .long 0xe49d3a2c,0xe49d3a2c .long 0x0d927850,0x0d927850 .long 0x9bcc5f6a,0x9bcc5f6a .long 0x62467e54,0x62467e54 .long 0xc2138df6,0xc2138df6 .long 0xe8b8d890,0xe8b8d890 .long 0x5ef7392e,0x5ef7392e .long 0xf5afc382,0xf5afc382 .long 0xbe805d9f,0xbe805d9f .long 0x7c93d069,0x7c93d069 .long 0xa92dd56f,0xa92dd56f .long 0xb31225cf,0xb31225cf .long 0x3b99acc8,0x3b99acc8 .long 0xa77d1810,0xa77d1810 .long 0x6e639ce8,0x6e639ce8 .long 0x7bbb3bdb,0x7bbb3bdb .long 0x097826cd,0x097826cd .long 0xf418596e,0xf418596e .long 0x01b79aec,0x01b79aec .long 0xa89a4f83,0xa89a4f83 .long 0x656e95e6,0x656e95e6 .long 0x7ee6ffaa,0x7ee6ffaa .long 0x08cfbc21,0x08cfbc21 .long 0xe6e815ef,0xe6e815ef .long 0xd99be7ba,0xd99be7ba .long 0xce366f4a,0xce366f4a .long 0xd4099fea,0xd4099fea .long 0xd67cb029,0xd67cb029 .long 0xafb2a431,0xafb2a431 .long 0x31233f2a,0x31233f2a .long 0x3094a5c6,0x3094a5c6 .long 0xc066a235,0xc066a235 .long 0x37bc4e74,0x37bc4e74 .long 0xa6ca82fc,0xa6ca82fc .long 0xb0d090e0,0xb0d090e0 .long 0x15d8a733,0x15d8a733 .long 0x4a9804f1,0x4a9804f1 .long 0xf7daec41,0xf7daec41 .long 0x0e50cd7f,0x0e50cd7f .long 0x2ff69117,0x2ff69117 .long 0x8dd64d76,0x8dd64d76 .long 0x4db0ef43,0x4db0ef43 .long 0x544daacc,0x544daacc .long 0xdf0496e4,0xdf0496e4 .long 0xe3b5d19e,0xe3b5d19e .long 0x1b886a4c,0x1b886a4c .long 0xb81f2cc1,0xb81f2cc1 .long 0x7f516546,0x7f516546 .long 0x04ea5e9d,0x04ea5e9d .long 0x5d358c01,0x5d358c01 .long 0x737487fa,0x737487fa .long 0x2e410bfb,0x2e410bfb .long 0x5a1d67b3,0x5a1d67b3 .long 0x52d2db92,0x52d2db92 .long 0x335610e9,0x335610e9 .long 0x1347d66d,0x1347d66d .long 0x8c61d79a,0x8c61d79a .long 0x7a0ca137,0x7a0ca137 .long 0x8e14f859,0x8e14f859 .long 0x893c13eb,0x893c13eb .long 0xee27a9ce,0xee27a9ce .long 0x35c961b7,0x35c961b7 .long 0xede51ce1,0xede51ce1 .long 0x3cb1477a,0x3cb1477a .long 0x59dfd29c,0x59dfd29c .long 0x3f73f255,0x3f73f255 .long 0x79ce1418,0x79ce1418 .long 0xbf37c773,0xbf37c773 .long 0xeacdf753,0xeacdf753 .long 0x5baafd5f,0x5baafd5f .long 0x146f3ddf,0x146f3ddf .long 0x86db4478,0x86db4478 .long 0x81f3afca,0x81f3afca .long 0x3ec468b9,0x3ec468b9 .long 0x2c342438,0x2c342438 .long 0x5f40a3c2,0x5f40a3c2 .long 0x72c31d16,0x72c31d16 .long 0x0c25e2bc,0x0c25e2bc .long 0x8b493c28,0x8b493c28 .long 0x41950dff,0x41950dff .long 0x7101a839,0x7101a839 .long 0xdeb30c08,0xdeb30c08 .long 0x9ce4b4d8,0x9ce4b4d8 .long 0x90c15664,0x90c15664 .long 0x6184cb7b,0x6184cb7b .long 0x70b632d5,0x70b632d5 .long 0x745c6c48,0x745c6c48 .long 0x4257b8d0,0x4257b8d0 .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 .byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 Index: head/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S (revision 299481) @@ -1,16 +1,755 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. .text -.globl aesni_gcm_encrypt -.type aesni_gcm_encrypt,@function -aesni_gcm_encrypt: - xorl %eax,%eax - .byte 0xf3,0xc3 -.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +.type _aesni_ctr32_ghash_6x,@function +.align 32 +_aesni_ctr32_ghash_6x: + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x +.align 32 +.Loop6x: + addl $100663296,%ebx + jc .Lhandle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%ebp + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail + +.align 32 +.Lhandle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%r10 + subq $0x6,%rdx + jc .L6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x .globl aesni_gcm_decrypt .type aesni_gcm_decrypt,@function +.align 32 aesni_gcm_decrypt: - xorl %eax,%eax + xorq %r10,%r10 + cmpq $0x60,%rdx + jb .Lgcm_dec_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + vmovdqu (%r9),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32+32(%r9),%r9 + movl 240-128(%rcx),%ebp + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Ldec_no_key_aliasing + cmpq $768,%r15 + jnc .Ldec_no_key_aliasing + subq %r15,%rsp +.Ldec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + leaq (%rdi),%r14 + vmovdqu 64(%rdi),%xmm4 + leaq -192(%rdi,%rdx,1),%r15 + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %r10,%r10 + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lgcm_dec_abort: + movq %r10,%rax .byte 0xf3,0xc3 .size aesni_gcm_decrypt,.-aesni_gcm_decrypt +.type _aesni_ctr32_6x,@function +.align 32 +_aesni_ctr32_6x: + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%rbp),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc .Lhandle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + .byte 0xf3,0xc3 +.align 32 +.Lhandle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +.align 32 +aesni_gcm_encrypt: + xorq %r10,%r10 + cmpq $288,%rdx + jb .Lgcm_enc_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%ebp + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Lenc_no_key_aliasing + cmpq $768,%r15 + jnc .Lenc_no_key_aliasing + subq %r15,%rsp +.Lenc_no_key_aliasing: + + leaq (%rsi),%r14 + leaq -192(%rsi,%rdx,1),%r15 + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + vmovdqu (%r9),%xmm8 + leaq 32+32(%r9),%r9 + subq $12,%rdx + movq $192,%r10 + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lgcm_enc_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 Index: head/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S (revision 299481) @@ -1,507 +1,1437 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. .text .globl aesni_multi_cbc_encrypt .type aesni_multi_cbc_encrypt,@function .align 32 aesni_multi_cbc_encrypt: + cmpl $2,%edx + jb .Lenc_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_enc_shortcut + jmp .Lenc_non_avx +.align 16 +.Lenc_non_avx: movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $48,%rsp andq $-64,%rsp movq %rax,16(%rsp) .Lenc4x_body: movdqu (%rsi),%xmm12 leaq 120(%rsi),%rsi leaq 80(%rdi),%rdi .Lenc4x_loop_grande: movl %edx,24(%rsp) xorl %edx,%edx movl -64(%rdi),%ecx movq -80(%rdi),%r8 cmpl %edx,%ecx movq -72(%rdi),%r12 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -56(%rdi),%xmm2 movl %ecx,32(%rsp) cmovleq %rsp,%r8 movl -24(%rdi),%ecx movq -40(%rdi),%r9 cmpl %edx,%ecx movq -32(%rdi),%r13 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -16(%rdi),%xmm3 movl %ecx,36(%rsp) cmovleq %rsp,%r9 movl 16(%rdi),%ecx movq 0(%rdi),%r10 cmpl %edx,%ecx movq 8(%rdi),%r14 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 24(%rdi),%xmm4 movl %ecx,40(%rsp) cmovleq %rsp,%r10 movl 56(%rdi),%ecx movq 40(%rdi),%r11 cmpl %edx,%ecx movq 48(%rdi),%r15 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 64(%rdi),%xmm5 movl %ecx,44(%rsp) cmovleq %rsp,%r11 testl %edx,%edx jz .Lenc4x_done movups 16-120(%rsi),%xmm1 pxor %xmm12,%xmm2 movups 32-120(%rsi),%xmm0 pxor %xmm12,%xmm3 movl 240-120(%rsi),%eax pxor %xmm12,%xmm4 movdqu (%r8),%xmm6 pxor %xmm12,%xmm5 movdqu (%r9),%xmm7 pxor %xmm6,%xmm2 movdqu (%r10),%xmm8 pxor %xmm7,%xmm3 movdqu (%r11),%xmm9 pxor %xmm8,%xmm4 pxor %xmm9,%xmm5 movdqa 32(%rsp),%xmm10 xorq %rbx,%rbx jmp .Loop_enc4x .align 32 .Loop_enc4x: addq $16,%rbx leaq 16(%rsp),%rbp movl $1,%ecx subq %rbx,%rbp .byte 102,15,56,220,209 prefetcht0 31(%r8,%rbx,1) prefetcht0 31(%r9,%rbx,1) .byte 102,15,56,220,217 prefetcht0 31(%r10,%rbx,1) prefetcht0 31(%r10,%rbx,1) .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 48-120(%rsi),%xmm1 cmpl 32(%rsp),%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 cmovgeq %rbp,%r8 cmovgq %rbp,%r12 .byte 102,15,56,220,232 movups -56(%rsi),%xmm0 cmpl 36(%rsp),%ecx .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 cmovgeq %rbp,%r9 cmovgq %rbp,%r13 .byte 102,15,56,220,233 movups -40(%rsi),%xmm1 cmpl 40(%rsp),%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 cmovgeq %rbp,%r10 cmovgq %rbp,%r14 .byte 102,15,56,220,232 movups -24(%rsi),%xmm0 cmpl 44(%rsp),%ecx .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 cmovgeq %rbp,%r11 cmovgq %rbp,%r15 .byte 102,15,56,220,233 movups -8(%rsi),%xmm1 movdqa %xmm10,%xmm11 .byte 102,15,56,220,208 prefetcht0 15(%r12,%rbx,1) prefetcht0 15(%r13,%rbx,1) .byte 102,15,56,220,216 prefetcht0 15(%r14,%rbx,1) prefetcht0 15(%r15,%rbx,1) .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 128-120(%rsi),%xmm0 pxor %xmm12,%xmm12 .byte 102,15,56,220,209 pcmpgtd %xmm12,%xmm11 movdqu -120(%rsi),%xmm12 .byte 102,15,56,220,217 paddd %xmm11,%xmm10 movdqa %xmm10,32(%rsp) .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 144-120(%rsi),%xmm1 cmpl $11,%eax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 160-120(%rsi),%xmm0 jb .Lenc4x_tail .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 176-120(%rsi),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 192-120(%rsi),%xmm0 je .Lenc4x_tail .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 208-120(%rsi),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups 224-120(%rsi),%xmm0 jmp .Lenc4x_tail .align 32 .Lenc4x_tail: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movdqu (%r8,%rbx,1),%xmm6 movdqu 16-120(%rsi),%xmm1 .byte 102,15,56,221,208 movdqu (%r9,%rbx,1),%xmm7 pxor %xmm12,%xmm6 .byte 102,15,56,221,216 movdqu (%r10,%rbx,1),%xmm8 pxor %xmm12,%xmm7 .byte 102,15,56,221,224 movdqu (%r11,%rbx,1),%xmm9 pxor %xmm12,%xmm8 .byte 102,15,56,221,232 movdqu 32-120(%rsi),%xmm0 pxor %xmm12,%xmm9 movups %xmm2,-16(%r12,%rbx,1) pxor %xmm6,%xmm2 movups %xmm3,-16(%r13,%rbx,1) pxor %xmm7,%xmm3 movups %xmm4,-16(%r14,%rbx,1) pxor %xmm8,%xmm4 movups %xmm5,-16(%r15,%rbx,1) pxor %xmm9,%xmm5 decl %edx jnz .Loop_enc4x movq 16(%rsp),%rax movl 24(%rsp),%edx leaq 160(%rdi),%rdi decl %edx jnz .Lenc4x_loop_grande .Lenc4x_done: movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lenc4x_epilogue: .byte 0xf3,0xc3 .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt .globl aesni_multi_cbc_decrypt .type aesni_multi_cbc_decrypt,@function .align 32 aesni_multi_cbc_decrypt: + cmpl $2,%edx + jb .Ldec_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_dec_shortcut + jmp .Ldec_non_avx +.align 16 +.Ldec_non_avx: movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $48,%rsp andq $-64,%rsp movq %rax,16(%rsp) .Ldec4x_body: movdqu (%rsi),%xmm12 leaq 120(%rsi),%rsi leaq 80(%rdi),%rdi .Ldec4x_loop_grande: movl %edx,24(%rsp) xorl %edx,%edx movl -64(%rdi),%ecx movq -80(%rdi),%r8 cmpl %edx,%ecx movq -72(%rdi),%r12 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -56(%rdi),%xmm6 movl %ecx,32(%rsp) cmovleq %rsp,%r8 movl -24(%rdi),%ecx movq -40(%rdi),%r9 cmpl %edx,%ecx movq -32(%rdi),%r13 cmovgl %ecx,%edx testl %ecx,%ecx movdqu -16(%rdi),%xmm7 movl %ecx,36(%rsp) cmovleq %rsp,%r9 movl 16(%rdi),%ecx movq 0(%rdi),%r10 cmpl %edx,%ecx movq 8(%rdi),%r14 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 24(%rdi),%xmm8 movl %ecx,40(%rsp) cmovleq %rsp,%r10 movl 56(%rdi),%ecx movq 40(%rdi),%r11 cmpl %edx,%ecx movq 48(%rdi),%r15 cmovgl %ecx,%edx testl %ecx,%ecx movdqu 64(%rdi),%xmm9 movl %ecx,44(%rsp) cmovleq %rsp,%r11 testl %edx,%edx jz .Ldec4x_done movups 16-120(%rsi),%xmm1 movups 32-120(%rsi),%xmm0 movl 240-120(%rsi),%eax movdqu (%r8),%xmm2 movdqu (%r9),%xmm3 pxor %xmm12,%xmm2 movdqu (%r10),%xmm4 pxor %xmm12,%xmm3 movdqu (%r11),%xmm5 pxor %xmm12,%xmm4 pxor %xmm12,%xmm5 movdqa 32(%rsp),%xmm10 xorq %rbx,%rbx jmp .Loop_dec4x .align 32 .Loop_dec4x: addq $16,%rbx leaq 16(%rsp),%rbp movl $1,%ecx subq %rbx,%rbp .byte 102,15,56,222,209 prefetcht0 31(%r8,%rbx,1) prefetcht0 31(%r9,%rbx,1) .byte 102,15,56,222,217 prefetcht0 31(%r10,%rbx,1) prefetcht0 31(%r11,%rbx,1) .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 48-120(%rsi),%xmm1 cmpl 32(%rsp),%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 cmovgeq %rbp,%r8 cmovgq %rbp,%r12 .byte 102,15,56,222,232 movups -56(%rsi),%xmm0 cmpl 36(%rsp),%ecx .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 cmovgeq %rbp,%r9 cmovgq %rbp,%r13 .byte 102,15,56,222,233 movups -40(%rsi),%xmm1 cmpl 40(%rsp),%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 cmovgeq %rbp,%r10 cmovgq %rbp,%r14 .byte 102,15,56,222,232 movups -24(%rsi),%xmm0 cmpl 44(%rsp),%ecx .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 cmovgeq %rbp,%r11 cmovgq %rbp,%r15 .byte 102,15,56,222,233 movups -8(%rsi),%xmm1 movdqa %xmm10,%xmm11 .byte 102,15,56,222,208 prefetcht0 15(%r12,%rbx,1) prefetcht0 15(%r13,%rbx,1) .byte 102,15,56,222,216 prefetcht0 15(%r14,%rbx,1) prefetcht0 15(%r15,%rbx,1) .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 128-120(%rsi),%xmm0 pxor %xmm12,%xmm12 .byte 102,15,56,222,209 pcmpgtd %xmm12,%xmm11 movdqu -120(%rsi),%xmm12 .byte 102,15,56,222,217 paddd %xmm11,%xmm10 movdqa %xmm10,32(%rsp) .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 144-120(%rsi),%xmm1 cmpl $11,%eax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 160-120(%rsi),%xmm0 jb .Ldec4x_tail .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 176-120(%rsi),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 192-120(%rsi),%xmm0 je .Ldec4x_tail .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 208-120(%rsi),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups 224-120(%rsi),%xmm0 jmp .Ldec4x_tail .align 32 .Ldec4x_tail: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 .byte 102,15,56,222,233 movdqu 16-120(%rsi),%xmm1 pxor %xmm0,%xmm8 pxor %xmm0,%xmm9 movdqu 32-120(%rsi),%xmm0 .byte 102,15,56,223,214 .byte 102,15,56,223,223 movdqu -16(%r8,%rbx,1),%xmm6 movdqu -16(%r9,%rbx,1),%xmm7 .byte 102,65,15,56,223,224 .byte 102,65,15,56,223,233 movdqu -16(%r10,%rbx,1),%xmm8 movdqu -16(%r11,%rbx,1),%xmm9 movups %xmm2,-16(%r12,%rbx,1) movdqu (%r8,%rbx,1),%xmm2 movups %xmm3,-16(%r13,%rbx,1) movdqu (%r9,%rbx,1),%xmm3 pxor %xmm12,%xmm2 movups %xmm4,-16(%r14,%rbx,1) movdqu (%r10,%rbx,1),%xmm4 pxor %xmm12,%xmm3 movups %xmm5,-16(%r15,%rbx,1) movdqu (%r11,%rbx,1),%xmm5 pxor %xmm12,%xmm4 pxor %xmm12,%xmm5 decl %edx jnz .Loop_dec4x movq 16(%rsp),%rax movl 24(%rsp),%edx leaq 160(%rdi),%rdi decl %edx jnz .Ldec4x_loop_grande .Ldec4x_done: movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Ldec4x_epilogue: .byte 0xf3,0xc3 .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt +.type aesni_multi_cbc_encrypt_avx,@function +.align 32 +aesni_multi_cbc_encrypt_avx: +_avx_cbc_enc_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + subq $192,%rsp + andq $-128,%rsp + movq %rax,16(%rsp) + +.Lenc8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Lenc8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + testl %edx,%edx + jz .Lenc8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + + vpxor (%r8),%xmm15,%xmm10 + leaq 128(%rsp),%rbp + vpxor (%r9),%xmm15,%xmm11 + vpxor (%r10),%xmm15,%xmm12 + vpxor (%r11),%xmm15,%xmm13 + vpxor %xmm10,%xmm2,%xmm2 + vpxor (%r12),%xmm15,%xmm10 + vpxor %xmm11,%xmm3,%xmm3 + vpxor (%r13),%xmm15,%xmm11 + vpxor %xmm12,%xmm4,%xmm4 + vpxor (%r14),%xmm15,%xmm12 + vpxor %xmm13,%xmm5,%xmm5 + vpxor (%r15),%xmm15,%xmm13 + vpxor %xmm10,%xmm6,%xmm6 + movl $1,%ecx + vpxor %xmm11,%xmm7,%xmm7 + vpxor %xmm12,%xmm8,%xmm8 + vpxor %xmm13,%xmm9,%xmm9 + jmp .Loop_enc8x + +.align 32 +.Loop_enc8x: + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r8),%xmm15,%xmm10 + movq %rbx,64+0(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,0(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r9),%xmm15,%xmm11 + movq %rbx,64+8(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,16(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r10),%xmm15,%xmm12 + movq %rbx,64+16(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,32(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r11),%xmm15,%xmm13 + movq %rbx,64+24(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,48(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r12),%xmm15,%xmm10 + movq %rbx,64+32(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r13),%xmm15,%xmm11 + movq %rbx,64+40(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r14),%xmm15,%xmm12 + movq %rbx,64+48(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r15),%xmm15,%xmm13 + movq %rbx,64+56(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Lenc8x_tail: + vaesenc %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesenc %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesenclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesenclast %xmm0,%xmm7,%xmm7 + vaesenclast %xmm0,%xmm8,%xmm8 + vmovdqa %xmm14,48(%rsp) + vaesenclast %xmm0,%xmm9,%xmm9 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vpxor 0(%rbp),%xmm2,%xmm2 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vpxor 16(%rbp),%xmm3,%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vpxor 32(%rbp),%xmm4,%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vpxor 48(%rbp),%xmm5,%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vpxor %xmm10,%xmm6,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vpxor %xmm11,%xmm7,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vpxor %xmm12,%xmm8,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vpxor %xmm13,%xmm9,%xmm9 + + decl %edx + jnz .Loop_enc8x + + movq 16(%rsp),%rax + + + + + +.Lenc8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lenc8x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx + +.type aesni_multi_cbc_decrypt_avx,@function +.align 32 +aesni_multi_cbc_decrypt_avx: +_avx_cbc_dec_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + + subq $256,%rsp + andq $-256,%rsp + subq $192,%rsp + movq %rax,16(%rsp) + +.Ldec8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Ldec8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + vmovdqu %xmm2,192(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + vmovdqu %xmm3,208(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + vmovdqu %xmm4,224(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + vmovdqu %xmm5,240(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + vmovdqu %xmm6,256(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + vmovdqu %xmm7,272(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + vmovdqu %xmm8,288(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + vmovdqu %xmm9,304(%rsp) + testl %edx,%edx + jz .Ldec8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + leaq 192+128(%rsp),%rbp + + vmovdqu (%r8),%xmm2 + vmovdqu (%r9),%xmm3 + vmovdqu (%r10),%xmm4 + vmovdqu (%r11),%xmm5 + vmovdqu (%r12),%xmm6 + vmovdqu (%r13),%xmm7 + vmovdqu (%r14),%xmm8 + vmovdqu (%r15),%xmm9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm6,64(%rbp) + vpxor %xmm15,%xmm6,%xmm6 + vmovdqu %xmm7,80(%rbp) + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm8,96(%rbp) + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu %xmm9,112(%rbp) + vpxor %xmm15,%xmm9,%xmm9 + xorq $0x80,%rbp + movl $1,%ecx + jmp .Loop_dec8x + +.align 32 +.Loop_dec8x: + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r8),%xmm10 + movq %rbx,64+0(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,128(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r9),%xmm11 + movq %rbx,64+8(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,144(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r10),%xmm12 + movq %rbx,64+16(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,160(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r11),%xmm13 + movq %rbx,64+24(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,176(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r12),%xmm10 + movq %rbx,64+32(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r13),%xmm11 + movq %rbx,64+40(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r14),%xmm12 + movq %rbx,64+48(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r15),%xmm13 + movq %rbx,64+56(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Ldec8x_tail: + vaesdec %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesdec %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesdeclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor 0(%rbp),%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor 16(%rbp),%xmm3,%xmm3 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor 32(%rbp),%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor 48(%rbp),%xmm5,%xmm5 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor 64(%rbp),%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm8,%xmm8 + vpxor 80(%rbp),%xmm7,%xmm7 + vmovdqa %xmm14,48(%rsp) + vaesdeclast %xmm0,%xmm9,%xmm9 + vpxor 96(%rbp),%xmm8,%xmm8 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vmovdqu 128+0(%rsp),%xmm2 + vpxor 112(%rbp),%xmm9,%xmm9 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu 128+16(%rsp),%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu 128+32(%rsp),%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu 128+48(%rsp),%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm10,64(%rbp) + vpxor %xmm10,%xmm15,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vmovdqu %xmm11,80(%rbp) + vpxor %xmm11,%xmm15,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vmovdqu %xmm12,96(%rbp) + vpxor %xmm12,%xmm15,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vmovdqu %xmm13,112(%rbp) + vpxor %xmm13,%xmm15,%xmm9 + + xorq $128,%rbp + decl %edx + jnz .Loop_dec8x + + movq 16(%rsp),%rax + + + + + +.Ldec8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Ldec8x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx Index: head/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S (revision 299481) @@ -1,1682 +1,2986 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-sha1-x86_64.pl. .text .globl aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc,@function .align 32 aesni_cbc_sha1_enc: movl OPENSSL_ia32cap_P+0(%rip),%r10d movq OPENSSL_ia32cap_P+4(%rip),%r11 btq $61,%r11 jc aesni_cbc_sha1_enc_shaext + andl $268435456,%r11d + andl $1073741824,%r10d + orl %r11d,%r10d + cmpl $1342177280,%r10d + je aesni_cbc_sha1_enc_avx jmp aesni_cbc_sha1_enc_ssse3 .byte 0xf3,0xc3 .size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc_ssse3,@function .align 32 aesni_cbc_sha1_enc_ssse3: movq 8(%rsp),%r10 pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq -104(%rsp),%rsp movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 leaq 112(%rcx),%r15 movdqu (%r8),%xmm2 movq %r8,88(%rsp) shlq $6,%r14 subq %r12,%r13 movl 240-112(%r15),%r8d addq %r10,%r14 leaq K_XX_XX(%rip),%r11 movl 0(%r9),%eax movl 4(%r9),%ebx movl 8(%r9),%ecx movl 12(%r9),%edx movl %ebx,%esi movl 16(%r9),%ebp movl %ecx,%edi xorl %edx,%edi andl %edi,%esi movdqa 64(%r11),%xmm3 movdqa 0(%r11),%xmm13 movdqu 0(%r10),%xmm4 movdqu 16(%r10),%xmm5 movdqu 32(%r10),%xmm6 movdqu 48(%r10),%xmm7 .byte 102,15,56,0,227 .byte 102,15,56,0,235 .byte 102,15,56,0,243 addq $64,%r10 paddd %xmm13,%xmm4 .byte 102,15,56,0,251 paddd %xmm13,%xmm5 paddd %xmm13,%xmm6 movdqa %xmm4,0(%rsp) psubd %xmm13,%xmm4 movdqa %xmm5,16(%rsp) psubd %xmm13,%xmm5 movdqa %xmm6,32(%rsp) psubd %xmm13,%xmm6 movups -112(%r15),%xmm15 movups 16-112(%r15),%xmm0 jmp .Loop_ssse3 .align 32 .Loop_ssse3: rorl $2,%ebx movups 0(%r12),%xmm14 xorps %xmm15,%xmm14 xorps %xmm14,%xmm2 movups -80(%r15),%xmm1 .byte 102,15,56,220,208 pshufd $238,%xmm4,%xmm8 xorl %edx,%esi movdqa %xmm7,%xmm12 paddd %xmm7,%xmm13 movl %eax,%edi addl 0(%rsp),%ebp punpcklqdq %xmm5,%xmm8 xorl %ecx,%ebx roll $5,%eax addl %esi,%ebp psrldq $4,%xmm12 andl %ebx,%edi xorl %ecx,%ebx pxor %xmm4,%xmm8 addl %eax,%ebp rorl $7,%eax pxor %xmm6,%xmm12 xorl %ecx,%edi movl %ebp,%esi addl 4(%rsp),%edx pxor %xmm12,%xmm8 xorl %ebx,%eax roll $5,%ebp movdqa %xmm13,48(%rsp) addl %edi,%edx movups -64(%r15),%xmm0 .byte 102,15,56,220,209 andl %eax,%esi movdqa %xmm8,%xmm3 xorl %ebx,%eax addl %ebp,%edx rorl $7,%ebp movdqa %xmm8,%xmm12 xorl %ebx,%esi pslldq $12,%xmm3 paddd %xmm8,%xmm8 movl %edx,%edi addl 8(%rsp),%ecx psrld $31,%xmm12 xorl %eax,%ebp roll $5,%edx addl %esi,%ecx movdqa %xmm3,%xmm13 andl %ebp,%edi xorl %eax,%ebp psrld $30,%xmm3 addl %edx,%ecx rorl $7,%edx por %xmm12,%xmm8 xorl %eax,%edi movl %ecx,%esi addl 12(%rsp),%ebx movups -48(%r15),%xmm1 .byte 102,15,56,220,208 pslld $2,%xmm13 pxor %xmm3,%xmm8 xorl %ebp,%edx movdqa 0(%r11),%xmm3 roll $5,%ecx addl %edi,%ebx andl %edx,%esi pxor %xmm13,%xmm8 xorl %ebp,%edx addl %ecx,%ebx rorl $7,%ecx pshufd $238,%xmm5,%xmm9 xorl %ebp,%esi movdqa %xmm8,%xmm13 paddd %xmm8,%xmm3 movl %ebx,%edi addl 16(%rsp),%eax punpcklqdq %xmm6,%xmm9 xorl %edx,%ecx roll $5,%ebx addl %esi,%eax psrldq $4,%xmm13 andl %ecx,%edi xorl %edx,%ecx pxor %xmm5,%xmm9 addl %ebx,%eax rorl $7,%ebx movups -32(%r15),%xmm0 .byte 102,15,56,220,209 pxor %xmm7,%xmm13 xorl %edx,%edi movl %eax,%esi addl 20(%rsp),%ebp pxor %xmm13,%xmm9 xorl %ecx,%ebx roll $5,%eax movdqa %xmm3,0(%rsp) addl %edi,%ebp andl %ebx,%esi movdqa %xmm9,%xmm12 xorl %ecx,%ebx addl %eax,%ebp rorl $7,%eax movdqa %xmm9,%xmm13 xorl %ecx,%esi pslldq $12,%xmm12 paddd %xmm9,%xmm9 movl %ebp,%edi addl 24(%rsp),%edx psrld $31,%xmm13 xorl %ebx,%eax roll $5,%ebp addl %esi,%edx movups -16(%r15),%xmm1 .byte 102,15,56,220,208 movdqa %xmm12,%xmm3 andl %eax,%edi xorl %ebx,%eax psrld $30,%xmm12 addl %ebp,%edx rorl $7,%ebp por %xmm13,%xmm9 xorl %ebx,%edi movl %edx,%esi addl 28(%rsp),%ecx pslld $2,%xmm3 pxor %xmm12,%xmm9 xorl %eax,%ebp movdqa 16(%r11),%xmm12 roll $5,%edx addl %edi,%ecx andl %ebp,%esi pxor %xmm3,%xmm9 xorl %eax,%ebp addl %edx,%ecx rorl $7,%edx pshufd $238,%xmm6,%xmm10 xorl %eax,%esi movdqa %xmm9,%xmm3 paddd %xmm9,%xmm12 movl %ecx,%edi addl 32(%rsp),%ebx movups 0(%r15),%xmm0 .byte 102,15,56,220,209 punpcklqdq %xmm7,%xmm10 xorl %ebp,%edx roll $5,%ecx addl %esi,%ebx psrldq $4,%xmm3 andl %edx,%edi xorl %ebp,%edx pxor %xmm6,%xmm10 addl %ecx,%ebx rorl $7,%ecx pxor %xmm8,%xmm3 xorl %ebp,%edi movl %ebx,%esi addl 36(%rsp),%eax pxor %xmm3,%xmm10 xorl %edx,%ecx roll $5,%ebx movdqa %xmm12,16(%rsp) addl %edi,%eax andl %ecx,%esi movdqa %xmm10,%xmm13 xorl %edx,%ecx addl %ebx,%eax rorl $7,%ebx movups 16(%r15),%xmm1 .byte 102,15,56,220,208 movdqa %xmm10,%xmm3 xorl %edx,%esi pslldq $12,%xmm13 paddd %xmm10,%xmm10 movl %eax,%edi addl 40(%rsp),%ebp psrld $31,%xmm3 xorl %ecx,%ebx roll $5,%eax addl %esi,%ebp movdqa %xmm13,%xmm12 andl %ebx,%edi xorl %ecx,%ebx psrld $30,%xmm13 addl %eax,%ebp rorl $7,%eax por %xmm3,%xmm10 xorl %ecx,%edi movl %ebp,%esi addl 44(%rsp),%edx pslld $2,%xmm12 pxor %xmm13,%xmm10 xorl %ebx,%eax movdqa 16(%r11),%xmm13 roll $5,%ebp addl %edi,%edx movups 32(%r15),%xmm0 .byte 102,15,56,220,209 andl %eax,%esi pxor %xmm12,%xmm10 xorl %ebx,%eax addl %ebp,%edx rorl $7,%ebp pshufd $238,%xmm7,%xmm11 xorl %ebx,%esi movdqa %xmm10,%xmm12 paddd %xmm10,%xmm13 movl %edx,%edi addl 48(%rsp),%ecx punpcklqdq %xmm8,%xmm11 xorl %eax,%ebp roll $5,%edx addl %esi,%ecx psrldq $4,%xmm12 andl %ebp,%edi xorl %eax,%ebp pxor %xmm7,%xmm11 addl %edx,%ecx rorl $7,%edx pxor %xmm9,%xmm12 xorl %eax,%edi movl %ecx,%esi addl 52(%rsp),%ebx movups 48(%r15),%xmm1 .byte 102,15,56,220,208 pxor %xmm12,%xmm11 xorl %ebp,%edx roll $5,%ecx movdqa %xmm13,32(%rsp) addl %edi,%ebx andl %edx,%esi movdqa %xmm11,%xmm3 xorl %ebp,%edx addl %ecx,%ebx rorl $7,%ecx movdqa %xmm11,%xmm12 xorl %ebp,%esi pslldq $12,%xmm3 paddd %xmm11,%xmm11 movl %ebx,%edi addl 56(%rsp),%eax psrld $31,%xmm12 xorl %edx,%ecx roll $5,%ebx addl %esi,%eax movdqa %xmm3,%xmm13 andl %ecx,%edi xorl %edx,%ecx psrld $30,%xmm3 addl %ebx,%eax rorl $7,%ebx cmpl $11,%r8d jb .Laesenclast1 movups 64(%r15),%xmm0 .byte 102,15,56,220,209 movups 80(%r15),%xmm1 .byte 102,15,56,220,208 je .Laesenclast1 movups 96(%r15),%xmm0 .byte 102,15,56,220,209 movups 112(%r15),%xmm1 .byte 102,15,56,220,208 .Laesenclast1: .byte 102,15,56,221,209 movups 16-112(%r15),%xmm0 por %xmm12,%xmm11 xorl %edx,%edi movl %eax,%esi addl 60(%rsp),%ebp pslld $2,%xmm13 pxor %xmm3,%xmm11 xorl %ecx,%ebx movdqa 16(%r11),%xmm3 roll $5,%eax addl %edi,%ebp andl %ebx,%esi pxor %xmm13,%xmm11 pshufd $238,%xmm10,%xmm13 xorl %ecx,%ebx addl %eax,%ebp rorl $7,%eax pxor %xmm8,%xmm4 xorl %ecx,%esi movl %ebp,%edi addl 0(%rsp),%edx punpcklqdq %xmm11,%xmm13 xorl %ebx,%eax roll $5,%ebp pxor %xmm5,%xmm4 addl %esi,%edx movups 16(%r12),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,0(%r12,%r13,1) xorps %xmm14,%xmm2 movups -80(%r15),%xmm1 .byte 102,15,56,220,208 andl %eax,%edi movdqa %xmm3,%xmm12 xorl %ebx,%eax paddd %xmm11,%xmm3 addl %ebp,%edx pxor %xmm13,%xmm4 rorl $7,%ebp xorl %ebx,%edi movl %edx,%esi addl 4(%rsp),%ecx movdqa %xmm4,%xmm13 xorl %eax,%ebp roll $5,%edx movdqa %xmm3,48(%rsp) addl %edi,%ecx andl %ebp,%esi xorl %eax,%ebp pslld $2,%xmm4 addl %edx,%ecx rorl $7,%edx psrld $30,%xmm13 xorl %eax,%esi movl %ecx,%edi addl 8(%rsp),%ebx movups -64(%r15),%xmm0 .byte 102,15,56,220,209 por %xmm13,%xmm4 xorl %ebp,%edx roll $5,%ecx pshufd $238,%xmm11,%xmm3 addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx addl %ecx,%ebx addl 12(%rsp),%eax xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax pxor %xmm9,%xmm5 addl 16(%rsp),%ebp movups -48(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%esi punpcklqdq %xmm4,%xmm3 movl %eax,%edi roll $5,%eax pxor %xmm6,%xmm5 addl %esi,%ebp xorl %ecx,%edi movdqa %xmm12,%xmm13 rorl $7,%ebx paddd %xmm4,%xmm12 addl %eax,%ebp pxor %xmm3,%xmm5 addl 20(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm5,%xmm3 addl %edi,%edx xorl %ebx,%esi movdqa %xmm12,0(%rsp) rorl $7,%eax addl %ebp,%edx addl 24(%rsp),%ecx pslld $2,%xmm5 xorl %eax,%esi movl %edx,%edi psrld $30,%xmm3 roll $5,%edx addl %esi,%ecx movups -32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%edi rorl $7,%ebp por %xmm3,%xmm5 addl %edx,%ecx addl 28(%rsp),%ebx pshufd $238,%xmm4,%xmm12 xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx pxor %xmm10,%xmm6 addl 32(%rsp),%eax xorl %edx,%esi punpcklqdq %xmm5,%xmm12 movl %ebx,%edi roll $5,%ebx pxor %xmm7,%xmm6 addl %esi,%eax xorl %edx,%edi movdqa 32(%r11),%xmm3 rorl $7,%ecx paddd %xmm5,%xmm13 addl %ebx,%eax pxor %xmm12,%xmm6 addl 36(%rsp),%ebp movups -16(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm6,%xmm12 addl %edi,%ebp xorl %ecx,%esi movdqa %xmm13,16(%rsp) rorl $7,%ebx addl %eax,%ebp addl 40(%rsp),%edx pslld $2,%xmm6 xorl %ebx,%esi movl %ebp,%edi psrld $30,%xmm12 roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax por %xmm12,%xmm6 addl %ebp,%edx addl 44(%rsp),%ecx pshufd $238,%xmm5,%xmm13 xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx movups 0(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx pxor %xmm11,%xmm7 addl 48(%rsp),%ebx xorl %ebp,%esi punpcklqdq %xmm6,%xmm13 movl %ecx,%edi roll $5,%ecx pxor %xmm8,%xmm7 addl %esi,%ebx xorl %ebp,%edi movdqa %xmm3,%xmm12 rorl $7,%edx paddd %xmm6,%xmm3 addl %ecx,%ebx pxor %xmm13,%xmm7 addl 52(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm7,%xmm13 addl %edi,%eax xorl %edx,%esi movdqa %xmm3,32(%rsp) rorl $7,%ecx addl %ebx,%eax addl 56(%rsp),%ebp movups 16(%r15),%xmm1 .byte 102,15,56,220,208 pslld $2,%xmm7 xorl %ecx,%esi movl %eax,%edi psrld $30,%xmm13 roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx por %xmm13,%xmm7 addl %eax,%ebp addl 60(%rsp),%edx pshufd $238,%xmm6,%xmm3 xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx pxor %xmm4,%xmm8 addl 0(%rsp),%ecx xorl %eax,%esi punpcklqdq %xmm7,%xmm3 movl %edx,%edi roll $5,%edx pxor %xmm9,%xmm8 addl %esi,%ecx movups 32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%edi movdqa %xmm12,%xmm13 rorl $7,%ebp paddd %xmm7,%xmm12 addl %edx,%ecx pxor %xmm3,%xmm8 addl 4(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm8,%xmm3 addl %edi,%ebx xorl %ebp,%esi movdqa %xmm12,48(%rsp) rorl $7,%edx addl %ecx,%ebx addl 8(%rsp),%eax pslld $2,%xmm8 xorl %edx,%esi movl %ebx,%edi psrld $30,%xmm3 roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx por %xmm3,%xmm8 addl %ebx,%eax addl 12(%rsp),%ebp movups 48(%r15),%xmm1 .byte 102,15,56,220,208 pshufd $238,%xmm7,%xmm12 xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp pxor %xmm5,%xmm9 addl 16(%rsp),%edx xorl %ebx,%esi punpcklqdq %xmm8,%xmm12 movl %ebp,%edi roll $5,%ebp pxor %xmm10,%xmm9 addl %esi,%edx xorl %ebx,%edi movdqa %xmm13,%xmm3 rorl $7,%eax paddd %xmm8,%xmm13 addl %ebp,%edx pxor %xmm12,%xmm9 addl 20(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx movdqa %xmm9,%xmm12 addl %edi,%ecx cmpl $11,%r8d jb .Laesenclast2 movups 64(%r15),%xmm0 .byte 102,15,56,220,209 movups 80(%r15),%xmm1 .byte 102,15,56,220,208 je .Laesenclast2 movups 96(%r15),%xmm0 .byte 102,15,56,220,209 movups 112(%r15),%xmm1 .byte 102,15,56,220,208 .Laesenclast2: .byte 102,15,56,221,209 movups 16-112(%r15),%xmm0 xorl %eax,%esi movdqa %xmm13,0(%rsp) rorl $7,%ebp addl %edx,%ecx addl 24(%rsp),%ebx pslld $2,%xmm9 xorl %ebp,%esi movl %ecx,%edi psrld $30,%xmm12 roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx por %xmm12,%xmm9 addl %ecx,%ebx addl 28(%rsp),%eax pshufd $238,%xmm8,%xmm13 rorl $7,%ecx movl %ebx,%esi xorl %edx,%edi roll $5,%ebx addl %edi,%eax xorl %ecx,%esi xorl %edx,%ecx addl %ebx,%eax pxor %xmm6,%xmm10 addl 32(%rsp),%ebp movups 32(%r12),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,16(%r13,%r12,1) xorps %xmm14,%xmm2 movups -80(%r15),%xmm1 .byte 102,15,56,220,208 andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx punpcklqdq %xmm9,%xmm13 movl %eax,%edi xorl %ecx,%esi pxor %xmm11,%xmm10 roll $5,%eax addl %esi,%ebp movdqa %xmm3,%xmm12 xorl %ebx,%edi paddd %xmm9,%xmm3 xorl %ecx,%ebx pxor %xmm13,%xmm10 addl %eax,%ebp addl 36(%rsp),%edx andl %ebx,%edi xorl %ecx,%ebx rorl $7,%eax movdqa %xmm10,%xmm13 movl %ebp,%esi xorl %ebx,%edi movdqa %xmm3,16(%rsp) roll $5,%ebp addl %edi,%edx movups -64(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%esi pslld $2,%xmm10 xorl %ebx,%eax addl %ebp,%edx psrld $30,%xmm13 addl 40(%rsp),%ecx andl %eax,%esi xorl %ebx,%eax por %xmm13,%xmm10 rorl $7,%ebp movl %edx,%edi xorl %eax,%esi roll $5,%edx pshufd $238,%xmm9,%xmm3 addl %esi,%ecx xorl %ebp,%edi xorl %eax,%ebp addl %edx,%ecx addl 44(%rsp),%ebx andl %ebp,%edi xorl %eax,%ebp rorl $7,%edx movups -48(%r15),%xmm1 .byte 102,15,56,220,208 movl %ecx,%esi xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx xorl %edx,%esi xorl %ebp,%edx addl %ecx,%ebx pxor %xmm7,%xmm11 addl 48(%rsp),%eax andl %edx,%esi xorl %ebp,%edx rorl $7,%ecx punpcklqdq %xmm10,%xmm3 movl %ebx,%edi xorl %edx,%esi pxor %xmm4,%xmm11 roll $5,%ebx addl %esi,%eax movdqa 48(%r11),%xmm13 xorl %ecx,%edi paddd %xmm10,%xmm12 xorl %edx,%ecx pxor %xmm3,%xmm11 addl %ebx,%eax addl 52(%rsp),%ebp movups -32(%r15),%xmm0 .byte 102,15,56,220,209 andl %ecx,%edi xorl %edx,%ecx rorl $7,%ebx movdqa %xmm11,%xmm3 movl %eax,%esi xorl %ecx,%edi movdqa %xmm12,32(%rsp) roll $5,%eax addl %edi,%ebp xorl %ebx,%esi pslld $2,%xmm11 xorl %ecx,%ebx addl %eax,%ebp psrld $30,%xmm3 addl 56(%rsp),%edx andl %ebx,%esi xorl %ecx,%ebx por %xmm3,%xmm11 rorl $7,%eax movl %ebp,%edi xorl %ebx,%esi roll $5,%ebp pshufd $238,%xmm10,%xmm12 addl %esi,%edx movups -16(%r15),%xmm1 .byte 102,15,56,220,208 xorl %eax,%edi xorl %ebx,%eax addl %ebp,%edx addl 60(%rsp),%ecx andl %eax,%edi xorl %ebx,%eax rorl $7,%ebp movl %edx,%esi xorl %eax,%edi roll $5,%edx addl %edi,%ecx xorl %ebp,%esi xorl %eax,%ebp addl %edx,%ecx pxor %xmm8,%xmm4 addl 0(%rsp),%ebx andl %ebp,%esi xorl %eax,%ebp rorl $7,%edx movups 0(%r15),%xmm0 .byte 102,15,56,220,209 punpcklqdq %xmm11,%xmm12 movl %ecx,%edi xorl %ebp,%esi pxor %xmm5,%xmm4 roll $5,%ecx addl %esi,%ebx movdqa %xmm13,%xmm3 xorl %edx,%edi paddd %xmm11,%xmm13 xorl %ebp,%edx pxor %xmm12,%xmm4 addl %ecx,%ebx addl 4(%rsp),%eax andl %edx,%edi xorl %ebp,%edx rorl $7,%ecx movdqa %xmm4,%xmm12 movl %ebx,%esi xorl %edx,%edi movdqa %xmm13,48(%rsp) roll $5,%ebx addl %edi,%eax xorl %ecx,%esi pslld $2,%xmm4 xorl %edx,%ecx addl %ebx,%eax psrld $30,%xmm12 addl 8(%rsp),%ebp movups 16(%r15),%xmm1 .byte 102,15,56,220,208 andl %ecx,%esi xorl %edx,%ecx por %xmm12,%xmm4 rorl $7,%ebx movl %eax,%edi xorl %ecx,%esi roll $5,%eax pshufd $238,%xmm11,%xmm13 addl %esi,%ebp xorl %ebx,%edi xorl %ecx,%ebx addl %eax,%ebp addl 12(%rsp),%edx andl %ebx,%edi xorl %ecx,%ebx rorl $7,%eax movl %ebp,%esi xorl %ebx,%edi roll $5,%ebp addl %edi,%edx movups 32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%esi xorl %ebx,%eax addl %ebp,%edx pxor %xmm9,%xmm5 addl 16(%rsp),%ecx andl %eax,%esi xorl %ebx,%eax rorl $7,%ebp punpcklqdq %xmm4,%xmm13 movl %edx,%edi xorl %eax,%esi pxor %xmm6,%xmm5 roll $5,%edx addl %esi,%ecx movdqa %xmm3,%xmm12 xorl %ebp,%edi paddd %xmm4,%xmm3 xorl %eax,%ebp pxor %xmm13,%xmm5 addl %edx,%ecx addl 20(%rsp),%ebx andl %ebp,%edi xorl %eax,%ebp rorl $7,%edx movups 48(%r15),%xmm1 .byte 102,15,56,220,208 movdqa %xmm5,%xmm13 movl %ecx,%esi xorl %ebp,%edi movdqa %xmm3,0(%rsp) roll $5,%ecx addl %edi,%ebx xorl %edx,%esi pslld $2,%xmm5 xorl %ebp,%edx addl %ecx,%ebx psrld $30,%xmm13 addl 24(%rsp),%eax andl %edx,%esi xorl %ebp,%edx por %xmm13,%xmm5 rorl $7,%ecx movl %ebx,%edi xorl %edx,%esi roll $5,%ebx pshufd $238,%xmm4,%xmm3 addl %esi,%eax xorl %ecx,%edi xorl %edx,%ecx addl %ebx,%eax addl 28(%rsp),%ebp cmpl $11,%r8d jb .Laesenclast3 movups 64(%r15),%xmm0 .byte 102,15,56,220,209 movups 80(%r15),%xmm1 .byte 102,15,56,220,208 je .Laesenclast3 movups 96(%r15),%xmm0 .byte 102,15,56,220,209 movups 112(%r15),%xmm1 .byte 102,15,56,220,208 .Laesenclast3: .byte 102,15,56,221,209 movups 16-112(%r15),%xmm0 andl %ecx,%edi xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%edi roll $5,%eax addl %edi,%ebp xorl %ebx,%esi xorl %ecx,%ebx addl %eax,%ebp pxor %xmm10,%xmm6 addl 32(%rsp),%edx andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax punpcklqdq %xmm5,%xmm3 movl %ebp,%edi xorl %ebx,%esi pxor %xmm7,%xmm6 roll $5,%ebp addl %esi,%edx movups 48(%r12),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,32(%r13,%r12,1) xorps %xmm14,%xmm2 movups -80(%r15),%xmm1 .byte 102,15,56,220,208 movdqa %xmm12,%xmm13 xorl %eax,%edi paddd %xmm5,%xmm12 xorl %ebx,%eax pxor %xmm3,%xmm6 addl %ebp,%edx addl 36(%rsp),%ecx andl %eax,%edi xorl %ebx,%eax rorl $7,%ebp movdqa %xmm6,%xmm3 movl %edx,%esi xorl %eax,%edi movdqa %xmm12,16(%rsp) roll $5,%edx addl %edi,%ecx xorl %ebp,%esi pslld $2,%xmm6 xorl %eax,%ebp addl %edx,%ecx psrld $30,%xmm3 addl 40(%rsp),%ebx andl %ebp,%esi xorl %eax,%ebp por %xmm3,%xmm6 rorl $7,%edx movups -64(%r15),%xmm0 .byte 102,15,56,220,209 movl %ecx,%edi xorl %ebp,%esi roll $5,%ecx pshufd $238,%xmm5,%xmm12 addl %esi,%ebx xorl %edx,%edi xorl %ebp,%edx addl %ecx,%ebx addl 44(%rsp),%eax andl %edx,%edi xorl %ebp,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%edi roll $5,%ebx addl %edi,%eax xorl %edx,%esi addl %ebx,%eax pxor %xmm11,%xmm7 addl 48(%rsp),%ebp movups -48(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%esi punpcklqdq %xmm6,%xmm12 movl %eax,%edi roll $5,%eax pxor %xmm8,%xmm7 addl %esi,%ebp xorl %ecx,%edi movdqa %xmm13,%xmm3 rorl $7,%ebx paddd %xmm6,%xmm13 addl %eax,%ebp pxor %xmm12,%xmm7 addl 52(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm7,%xmm12 addl %edi,%edx xorl %ebx,%esi movdqa %xmm13,32(%rsp) rorl $7,%eax addl %ebp,%edx addl 56(%rsp),%ecx pslld $2,%xmm7 xorl %eax,%esi movl %edx,%edi psrld $30,%xmm12 roll $5,%edx addl %esi,%ecx movups -32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%edi rorl $7,%ebp por %xmm12,%xmm7 addl %edx,%ecx addl 60(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx addl 0(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx paddd %xmm7,%xmm3 addl %esi,%eax xorl %edx,%edi movdqa %xmm3,48(%rsp) rorl $7,%ecx addl %ebx,%eax addl 4(%rsp),%ebp movups -16(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 8(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax addl %ebp,%edx addl 12(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx movups 0(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx cmpq %r14,%r10 je .Ldone_ssse3 movdqa 64(%r11),%xmm3 movdqa 0(%r11),%xmm13 movdqu 0(%r10),%xmm4 movdqu 16(%r10),%xmm5 movdqu 32(%r10),%xmm6 movdqu 48(%r10),%xmm7 .byte 102,15,56,0,227 addq $64,%r10 addl 16(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi .byte 102,15,56,0,235 roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx paddd %xmm13,%xmm4 addl %ecx,%ebx addl 20(%rsp),%eax xorl %edx,%edi movl %ebx,%esi movdqa %xmm4,0(%rsp) roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx psubd %xmm13,%xmm4 addl %ebx,%eax addl 24(%rsp),%ebp movups 16(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%esi movl %eax,%edi roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx addl %eax,%ebp addl 28(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx addl 32(%rsp),%ecx xorl %eax,%esi movl %edx,%edi .byte 102,15,56,0,243 roll $5,%edx addl %esi,%ecx movups 32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%edi rorl $7,%ebp paddd %xmm13,%xmm5 addl %edx,%ecx addl 36(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi movdqa %xmm5,16(%rsp) roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx psubd %xmm13,%xmm5 addl %ecx,%ebx addl 40(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx addl %ebx,%eax addl 44(%rsp),%ebp movups 48(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 48(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi .byte 102,15,56,0,251 roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax paddd %xmm13,%xmm6 addl %ebp,%edx addl 52(%rsp),%ecx xorl %eax,%edi movl %edx,%esi movdqa %xmm6,32(%rsp) roll $5,%edx addl %edi,%ecx cmpl $11,%r8d jb .Laesenclast4 movups 64(%r15),%xmm0 .byte 102,15,56,220,209 movups 80(%r15),%xmm1 .byte 102,15,56,220,208 je .Laesenclast4 movups 96(%r15),%xmm0 .byte 102,15,56,220,209 movups 112(%r15),%xmm1 .byte 102,15,56,220,208 .Laesenclast4: .byte 102,15,56,221,209 movups 16-112(%r15),%xmm0 xorl %eax,%esi rorl $7,%ebp psubd %xmm13,%xmm6 addl %edx,%ecx addl 56(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 60(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax rorl $7,%ecx addl %ebx,%eax movups %xmm2,48(%r13,%r12,1) leaq 64(%r12),%r12 addl 0(%r9),%eax addl 4(%r9),%esi addl 8(%r9),%ecx addl 12(%r9),%edx movl %eax,0(%r9) addl 16(%r9),%ebp movl %esi,4(%r9) movl %esi,%ebx movl %ecx,8(%r9) movl %ecx,%edi movl %edx,12(%r9) xorl %edx,%edi movl %ebp,16(%r9) andl %edi,%esi jmp .Loop_ssse3 .Ldone_ssse3: addl 16(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 20(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 24(%rsp),%ebp movups 16(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%esi movl %eax,%edi roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx addl %eax,%ebp addl 28(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx addl 32(%rsp),%ecx xorl %eax,%esi movl %edx,%edi roll $5,%edx addl %esi,%ecx movups 32(%r15),%xmm0 .byte 102,15,56,220,209 xorl %eax,%edi rorl $7,%ebp addl %edx,%ecx addl 36(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx addl 40(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx addl %ebx,%eax addl 44(%rsp),%ebp movups 48(%r15),%xmm1 .byte 102,15,56,220,208 xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 48(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax addl %ebp,%edx addl 52(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx cmpl $11,%r8d jb .Laesenclast5 movups 64(%r15),%xmm0 .byte 102,15,56,220,209 movups 80(%r15),%xmm1 .byte 102,15,56,220,208 je .Laesenclast5 movups 96(%r15),%xmm0 .byte 102,15,56,220,209 movups 112(%r15),%xmm1 .byte 102,15,56,220,208 .Laesenclast5: .byte 102,15,56,221,209 movups 16-112(%r15),%xmm0 xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx addl 56(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 60(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax rorl $7,%ecx addl %ebx,%eax movups %xmm2,48(%r13,%r12,1) movq 88(%rsp),%r8 addl 0(%r9),%eax addl 4(%r9),%esi addl 8(%r9),%ecx movl %eax,0(%r9) addl 12(%r9),%edx movl %esi,4(%r9) addl 16(%r9),%ebp movl %ecx,8(%r9) movl %edx,12(%r9) movl %ebp,16(%r9) movups %xmm2,(%r8) leaq 104(%rsp),%rsi movq 0(%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 .size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.type aesni_cbc_sha1_enc_avx,@function +.align 32 +aesni_cbc_sha1_enc_avx: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + vzeroall + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + vmovdqu (%r8),%xmm12 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r10 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm10,%xmm0,%xmm4 + vpaddd %xmm10,%xmm1,%xmm5 + vpaddd %xmm10,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + jmp .Loop_avx +.align 32 +.Loop_avx: + shrdl $2,%ebx,%ebx + vmovdqu 0(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm10,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm9 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpor %xmm8,%xmm4,%xmm4 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + vpxor %xmm9,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm10,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm9 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpor %xmm8,%xmm5,%xmm5 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm9,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa 16(%r11),%xmm10 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpaddd %xmm5,%xmm10,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm9 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpor %xmm8,%xmm6,%xmm6 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm9,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm10,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm9 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpor %xmm8,%xmm7,%xmm7 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + cmpl $11,%r8d + jb .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast6: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm9,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm10,%xmm9 + addl %esi,%edx + vmovdqu 16(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,0(%r12,%r13,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm10,%xmm9 + vmovdqa 32(%r11),%xmm10 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + vpaddd %xmm3,%xmm10,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm10,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast7: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + vmovdqu 32(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,16(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm10,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm10,%xmm9 + vmovdqa 48(%r11),%xmm10 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm10,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm10,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast8: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm10,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vmovdqu 48(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,32(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm10,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je .Ldone_avx + vmovdqa 64(%r11),%xmm9 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm9,%xmm0,%xmm0 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm9,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm10,%xmm0,%xmm8 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm8,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm9,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm10,%xmm1,%xmm8 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm8,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm9,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm10,%xmm2,%xmm8 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm8,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast9: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp .Loop_avx + +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast10: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + vmovups %xmm12,(%r8) + vzeroall + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx .align 64 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .type aesni_cbc_sha1_enc_shaext,@function .align 32 aesni_cbc_sha1_enc_shaext: movq 8(%rsp),%r10 movdqu (%r9),%xmm8 movd 16(%r9),%xmm9 movdqa K_XX_XX+80(%rip),%xmm7 movl 240(%rcx),%r11d subq %rdi,%rsi movups (%rcx),%xmm15 movups 16(%rcx),%xmm0 leaq 112(%rcx),%rcx pshufd $27,%xmm8,%xmm8 pshufd $27,%xmm9,%xmm9 jmp .Loop_shaext .align 16 .Loop_shaext: movups 0(%rdi),%xmm14 xorps %xmm15,%xmm14 xorps %xmm14,%xmm2 movups -80(%rcx),%xmm1 .byte 102,15,56,220,208 movdqu (%r10),%xmm3 movdqa %xmm9,%xmm12 .byte 102,15,56,0,223 movdqu 16(%r10),%xmm4 movdqa %xmm8,%xmm11 movups -64(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,0,231 paddd %xmm3,%xmm9 movdqu 32(%r10),%xmm5 leaq 64(%r10),%r10 pxor %xmm12,%xmm3 movups -48(%rcx),%xmm1 .byte 102,15,56,220,208 pxor %xmm12,%xmm3 movdqa %xmm8,%xmm10 .byte 102,15,56,0,239 .byte 69,15,58,204,193,0 .byte 68,15,56,200,212 movups -32(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 15,56,201,220 movdqu -16(%r10),%xmm6 movdqa %xmm8,%xmm9 .byte 102,15,56,0,247 movups -16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 69,15,58,204,194,0 .byte 68,15,56,200,205 pxor %xmm5,%xmm3 .byte 15,56,201,229 movups 0(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,0 .byte 68,15,56,200,214 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,222 pxor %xmm6,%xmm4 .byte 15,56,201,238 movups 32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,0 .byte 68,15,56,200,203 movups 48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,227 pxor %xmm3,%xmm5 .byte 15,56,201,243 cmpl $11,%r11d - jb .Laesenclast6 + jb .Laesenclast11 movups 64(%rcx),%xmm0 .byte 102,15,56,220,209 movups 80(%rcx),%xmm1 .byte 102,15,56,220,208 - je .Laesenclast6 + je .Laesenclast11 movups 96(%rcx),%xmm0 .byte 102,15,56,220,209 movups 112(%rcx),%xmm1 .byte 102,15,56,220,208 -.Laesenclast6: +.Laesenclast11: .byte 102,15,56,221,209 movups 16-112(%rcx),%xmm0 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,0 .byte 68,15,56,200,212 movups 16(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,0(%rsi,%rdi,1) xorps %xmm14,%xmm2 movups -80(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,236 pxor %xmm4,%xmm6 .byte 15,56,201,220 movups -64(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,1 .byte 68,15,56,200,205 movups -48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,245 pxor %xmm5,%xmm3 .byte 15,56,201,229 movups -32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,1 .byte 68,15,56,200,214 movups -16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,222 pxor %xmm6,%xmm4 .byte 15,56,201,238 movups 0(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,1 .byte 68,15,56,200,203 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,227 pxor %xmm3,%xmm5 .byte 15,56,201,243 movups 32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,1 .byte 68,15,56,200,212 movups 48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,236 pxor %xmm4,%xmm6 .byte 15,56,201,220 cmpl $11,%r11d - jb .Laesenclast7 + jb .Laesenclast12 movups 64(%rcx),%xmm0 .byte 102,15,56,220,209 movups 80(%rcx),%xmm1 .byte 102,15,56,220,208 - je .Laesenclast7 + je .Laesenclast12 movups 96(%rcx),%xmm0 .byte 102,15,56,220,209 movups 112(%rcx),%xmm1 .byte 102,15,56,220,208 -.Laesenclast7: +.Laesenclast12: .byte 102,15,56,221,209 movups 16-112(%rcx),%xmm0 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,1 .byte 68,15,56,200,205 movups 32(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,16(%rsi,%rdi,1) xorps %xmm14,%xmm2 movups -80(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,245 pxor %xmm5,%xmm3 .byte 15,56,201,229 movups -64(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,2 .byte 68,15,56,200,214 movups -48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,222 pxor %xmm6,%xmm4 .byte 15,56,201,238 movups -32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,2 .byte 68,15,56,200,203 movups -16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,227 pxor %xmm3,%xmm5 .byte 15,56,201,243 movups 0(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,2 .byte 68,15,56,200,212 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,236 pxor %xmm4,%xmm6 .byte 15,56,201,220 movups 32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,2 .byte 68,15,56,200,205 movups 48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,245 pxor %xmm5,%xmm3 .byte 15,56,201,229 cmpl $11,%r11d - jb .Laesenclast8 + jb .Laesenclast13 movups 64(%rcx),%xmm0 .byte 102,15,56,220,209 movups 80(%rcx),%xmm1 .byte 102,15,56,220,208 - je .Laesenclast8 + je .Laesenclast13 movups 96(%rcx),%xmm0 .byte 102,15,56,220,209 movups 112(%rcx),%xmm1 .byte 102,15,56,220,208 -.Laesenclast8: +.Laesenclast13: .byte 102,15,56,221,209 movups 16-112(%rcx),%xmm0 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,2 .byte 68,15,56,200,214 movups 48(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm2,32(%rsi,%rdi,1) xorps %xmm14,%xmm2 movups -80(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,222 pxor %xmm6,%xmm4 .byte 15,56,201,238 movups -64(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,3 .byte 68,15,56,200,203 movups -48(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 15,56,202,227 pxor %xmm3,%xmm5 .byte 15,56,201,243 movups -32(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,3 .byte 68,15,56,200,212 .byte 15,56,202,236 pxor %xmm4,%xmm6 movups -16(%rcx),%xmm1 .byte 102,15,56,220,208 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,3 .byte 68,15,56,200,205 .byte 15,56,202,245 movups 0(%rcx),%xmm0 .byte 102,15,56,220,209 movdqa %xmm12,%xmm5 movdqa %xmm8,%xmm10 .byte 69,15,58,204,193,3 .byte 68,15,56,200,214 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 movdqa %xmm8,%xmm9 .byte 69,15,58,204,194,3 .byte 68,15,56,200,205 movups 32(%rcx),%xmm0 .byte 102,15,56,220,209 movups 48(%rcx),%xmm1 .byte 102,15,56,220,208 cmpl $11,%r11d - jb .Laesenclast9 + jb .Laesenclast14 movups 64(%rcx),%xmm0 .byte 102,15,56,220,209 movups 80(%rcx),%xmm1 .byte 102,15,56,220,208 - je .Laesenclast9 + je .Laesenclast14 movups 96(%rcx),%xmm0 .byte 102,15,56,220,209 movups 112(%rcx),%xmm1 .byte 102,15,56,220,208 -.Laesenclast9: +.Laesenclast14: .byte 102,15,56,221,209 movups 16-112(%rcx),%xmm0 decq %rdx paddd %xmm11,%xmm8 movups %xmm2,48(%rsi,%rdi,1) leaq 64(%rdi),%rdi jnz .Loop_shaext pshufd $27,%xmm8,%xmm8 pshufd $27,%xmm9,%xmm9 movups %xmm2,(%r8) movdqu %xmm8,(%r9) movd %xmm9,16(%r9) .byte 0xf3,0xc3 .size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext Index: head/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S (revision 299481) @@ -1,58 +1,4356 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-sha256-x86_64.pl. .text .globl aesni_cbc_sha256_enc .type aesni_cbc_sha256_enc,@function .align 16 aesni_cbc_sha256_enc: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl $1,%eax + cmpq $0,%rdi + je .Lprobe + movl 0(%r11),%eax + movq 4(%r11),%r10 + btq $61,%r10 + jc aesni_cbc_sha256_enc_shaext + movq %r10,%r11 + shrq $32,%r11 + + testl $2048,%r10d + jnz aesni_cbc_sha256_enc_xop + andl $296,%r11d + cmpl $296,%r11d + je aesni_cbc_sha256_enc_avx2 + andl $268435456,%r10d + jnz aesni_cbc_sha256_enc_avx + ud2 xorl %eax,%eax cmpq $0,%rdi je .Lprobe ud2 .Lprobe: .byte 0xf3,0xc3 .size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc .align 64 .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 .long 0,0,0,0, 0,0,0,0 .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 +.type aesni_cbc_sha256_enc_xop,@function +.align 64 +aesni_cbc_sha256_enc_xop: +.Lxop_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_xop: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm2,%xmm3,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm0,%xmm0 + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,251,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm3,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm0,%xmm0 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,248,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm0,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm0,%xmm0 + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm3,%xmm0,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm1,%xmm1 + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,248,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm0,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm1,%xmm1 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,249,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm1,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm1,%xmm1 + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm0,%xmm1,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm2,%xmm2 + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,249,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm1,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm2,%xmm2 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,250,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm2,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm2,%xmm2 + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm1,%xmm2,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm3,%xmm3 + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,250,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm2,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm3,%xmm3 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,251,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm3,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm3,%xmm3 + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lxop_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jb .Lloop_xop + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop +.type aesni_cbc_sha256_enc_avx,@function +.align 64 +aesni_cbc_sha256_enc_avx: +.Lavx_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_avx: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm3,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpaddd %xmm6,%xmm0,%xmm0 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm0,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 0(%rbp),%xmm0,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm0,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpaddd %xmm6,%xmm1,%xmm1 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm1,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 32(%rbp),%xmm1,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm1,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpaddd %xmm6,%xmm2,%xmm2 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm2,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 64(%rbp),%xmm2,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm2,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpaddd %xmm6,%xmm3,%xmm3 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm3,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 96(%rbp),%xmm3,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lavx_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + jb .Lloop_avx + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx +.type aesni_cbc_sha256_enc_avx2,@function +.align 64 +aesni_cbc_sha256_enc_avx2: +.Lavx2_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $576,%rsp + andq $-1024,%rsp + addq $448,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_avx2: + vzeroall + + movq %rdi,%r13 + vpinsrq $1,%rsi,%xmm15,%xmm15 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r12 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + leaq -9(%r14),%r14 + + vmovdqa 0(%r12,%r14,8),%xmm14 + vmovdqa 16(%r12,%r14,8),%xmm13 + vmovdqa 32(%r12,%r14,8),%xmm12 + + subq $-64,%r13 + movl 0(%r15),%eax + leaq (%rsi,%r13,1),%r12 + movl 4(%r15),%ebx + cmpq %rdx,%r13 + movl 8(%r15),%ecx + cmoveq %rsp,%r12 + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + vmovdqu 0-128(%rdi),%xmm10 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi,%r13,1),%xmm0 + vmovdqu -64+16(%rsi,%r13,1),%xmm1 + vmovdqu -64+32(%rsi,%r13,1),%xmm2 + vmovdqu -64+48(%rsi,%r13,1),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + leaq -64(%r13),%r13 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%esi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%esi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm0,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm0,%ymm0 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 0(%rbp),%ymm0,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm1,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm1,%ymm1 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 32(%rbp),%ymm1,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm2,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm2,%ymm2 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 64(%rbp),%ymm2,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm3,%ymm7 + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm3,%ymm3 + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 96(%rbp),%ymm3,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vpextrq $1,%xmm15,%r12 + vmovq %xmm15,%r13 + movq 552(%rsp),%r15 + addl %r14d,%eax + leaq 448(%rsp),%rbp + + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r13),%r13 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + cmpq 80(%rbp),%r13 + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%esi + movl %r9d,%r12d + xorl %ecx,%esi + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + leaq -64(%rbp),%rbp + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 552(%rsp),%r15 + leaq 64(%r13),%r13 + movq 560(%rsp),%rsi + addl %r14d,%eax + leaq 448(%rsp),%rsp + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + leaq (%rsi,%r13,1),%r12 + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r13 + + movl %eax,0(%r15) + cmoveq %rsp,%r12 + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2 +.type aesni_cbc_sha256_enc_shaext,@function +.align 32 +aesni_cbc_sha256_enc_shaext: + movq 8(%rsp),%r10 + leaq K256+128(%rip),%rax + movdqu (%r9),%xmm1 + movdqu 16(%r9),%xmm2 + movdqa 512-128(%rax),%xmm3 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm4 + leaq 112(%rcx),%rcx + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%r10),%xmm10 + movdqu 16(%r10),%xmm11 + movdqu 32(%r10),%xmm12 +.byte 102,68,15,56,0,211 + movdqu 48(%r10),%xmm13 + + movdqa 0-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 102,68,15,56,0,219 + movdqa %xmm2,%xmm9 + movdqa %xmm1,%xmm8 + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 32-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 102,68,15,56,0,227 + leaq 64(%r10),%r10 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 64-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 102,68,15,56,0,235 +.byte 69,15,56,204,211 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 96-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 +.byte 15,56,203,202 + movdqa 128-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + cmpl $11,%r11d + jb .Laesenclast1 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast1 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast1: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,0(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 160-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 192-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 224-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 256-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast2 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast2 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast2: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,16(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 288-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 320-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 352-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 384-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 416-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + cmpl $11,%r11d + jb .Laesenclast3 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast3 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast3: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,32(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 448-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 + movdqa %xmm7,%xmm3 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 480-128(%rax),%xmm0 + paddd %xmm13,%xmm0 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast4 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast4 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast4: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop + + paddd %xmm9,%xmm2 + paddd %xmm8,%xmm1 + + decq %rdx + movups %xmm6,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm3 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,211,8 + + movups %xmm6,(%r8) + movdqu %xmm1,(%r9) + movdqu %xmm2,16(%r9) + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext Index: head/secure/lib/libcrypto/amd64/aesni-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/aesni-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/aesni-x86_64.S (revision 299481) @@ -1,3552 +1,3553 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-x86_64.pl. .text .globl aesni_encrypt .type aesni_encrypt,@function .align 16 aesni_encrypt: movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 movups 16(%rdx),%xmm1 leaq 32(%rdx),%rdx xorps %xmm0,%xmm2 .Loop_enc1_1: .byte 102,15,56,220,209 decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz .Loop_enc1_1 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_encrypt,.-aesni_encrypt .globl aesni_decrypt .type aesni_decrypt,@function .align 16 aesni_decrypt: movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 movups 16(%rdx),%xmm1 leaq 32(%rdx),%rdx xorps %xmm0,%xmm2 .Loop_dec1_2: .byte 102,15,56,222,209 decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz .Loop_dec1_2 .byte 102,15,56,223,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_decrypt, .-aesni_decrypt .type _aesni_encrypt2,@function .align 16 _aesni_encrypt2: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax .Lenc_loop2: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop2 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 0xf3,0xc3 .size _aesni_encrypt2,.-_aesni_encrypt2 .type _aesni_decrypt2,@function .align 16 _aesni_decrypt2: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax .Ldec_loop2: .byte 102,15,56,222,209 .byte 102,15,56,222,217 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop2 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 0xf3,0xc3 .size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax .Lenc_loop3: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop3 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 0xf3,0xc3 .size _aesni_encrypt3,.-_aesni_encrypt3 .type _aesni_decrypt3,@function .align 16 _aesni_decrypt3: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax addq $16,%rax .Ldec_loop3: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop3 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 0xf3,0xc3 .size _aesni_decrypt3,.-_aesni_decrypt3 .type _aesni_encrypt4,@function .align 16 _aesni_encrypt4: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 0x0f,0x1f,0x00 addq $16,%rax .Lenc_loop4: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop4 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 0xf3,0xc3 .size _aesni_encrypt4,.-_aesni_encrypt4 .type _aesni_decrypt4,@function .align 16 _aesni_decrypt4: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 movups 32(%rcx),%xmm0 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 0x0f,0x1f,0x00 addq $16,%rax .Ldec_loop4: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop4 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 0xf3,0xc3 .size _aesni_decrypt4,.-_aesni_decrypt4 .type _aesni_encrypt6,@function .align 16 _aesni_encrypt6: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,220,209 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,217 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp .Lenc_loop6_enter .align 16 .Lenc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .Lenc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 0xf3,0xc3 .size _aesni_encrypt6,.-_aesni_encrypt6 .type _aesni_decrypt6,@function .align 16 _aesni_decrypt6: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,222,209 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,217 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,222,225 pxor %xmm0,%xmm7 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp .Ldec_loop6_enter .align 16 .Ldec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .Ldec_loop6_enter: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop6 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 .byte 0xf3,0xc3 .size _aesni_decrypt6,.-_aesni_decrypt6 .type _aesni_encrypt8,@function .align 16 _aesni_encrypt8: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 pxor %xmm0,%xmm7 pxor %xmm0,%xmm8 .byte 102,15,56,220,217 pxor %xmm0,%xmm9 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp .Lenc_loop8_inner .align 16 .Lenc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .Lenc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 .Lenc_loop8_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop8 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 102,68,15,56,221,192 .byte 102,68,15,56,221,200 .byte 0xf3,0xc3 .size _aesni_encrypt8,.-_aesni_encrypt8 .type _aesni_decrypt8,@function .align 16 _aesni_decrypt8: movups (%rcx),%xmm0 shll $4,%eax movups 16(%rcx),%xmm1 xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,209 pxor %xmm0,%xmm7 pxor %xmm0,%xmm8 .byte 102,15,56,222,217 pxor %xmm0,%xmm9 movups (%rcx,%rax,1),%xmm0 addq $16,%rax jmp .Ldec_loop8_inner .align 16 .Ldec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .Ldec_loop8_inner: .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 .Ldec_loop8_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop8 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 .byte 102,68,15,56,223,192 .byte 102,68,15,56,223,200 .byte 0xf3,0xc3 .size _aesni_decrypt8,.-_aesni_decrypt8 .globl aesni_ecb_encrypt .type aesni_ecb_encrypt,@function .align 16 aesni_ecb_encrypt: andq $-16,%rdx jz .Lecb_ret movl 240(%rcx),%eax movups (%rcx),%xmm0 movq %rcx,%r11 movl %eax,%r10d testl %r8d,%r8d jz .Lecb_decrypt cmpq $0x80,%rdx jb .Lecb_enc_tail movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 movdqu 48(%rdi),%xmm5 movdqu 64(%rdi),%xmm6 movdqu 80(%rdi),%xmm7 movdqu 96(%rdi),%xmm8 movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi subq $0x80,%rdx jmp .Lecb_enc_loop8_enter .align 16 .Lecb_enc_loop8: movups %xmm2,(%rsi) movq %r11,%rcx movdqu (%rdi),%xmm2 movl %r10d,%eax movups %xmm3,16(%rsi) movdqu 16(%rdi),%xmm3 movups %xmm4,32(%rsi) movdqu 32(%rdi),%xmm4 movups %xmm5,48(%rsi) movdqu 48(%rdi),%xmm5 movups %xmm6,64(%rsi) movdqu 64(%rdi),%xmm6 movups %xmm7,80(%rsi) movdqu 80(%rdi),%xmm7 movups %xmm8,96(%rsi) movdqu 96(%rdi),%xmm8 movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi .Lecb_enc_loop8_enter: call _aesni_encrypt8 subq $0x80,%rdx jnc .Lecb_enc_loop8 movups %xmm2,(%rsi) movq %r11,%rcx movups %xmm3,16(%rsi) movl %r10d,%eax movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi addq $0x80,%rdx jz .Lecb_ret .Lecb_enc_tail: movups (%rdi),%xmm2 cmpq $0x20,%rdx jb .Lecb_enc_one movups 16(%rdi),%xmm3 je .Lecb_enc_two movups 32(%rdi),%xmm4 cmpq $0x40,%rdx jb .Lecb_enc_three movups 48(%rdi),%xmm5 je .Lecb_enc_four movups 64(%rdi),%xmm6 cmpq $0x60,%rdx jb .Lecb_enc_five movups 80(%rdi),%xmm7 je .Lecb_enc_six movdqu 96(%rdi),%xmm8 xorps %xmm9,%xmm9 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_enc1_3: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_3 .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_two: call _aesni_encrypt2 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_five: xorps %xmm7,%xmm7 call _aesni_encrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_six: call _aesni_encrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) jmp .Lecb_ret .align 16 .Lecb_decrypt: cmpq $0x80,%rdx jb .Lecb_dec_tail movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 movdqu 48(%rdi),%xmm5 movdqu 64(%rdi),%xmm6 movdqu 80(%rdi),%xmm7 movdqu 96(%rdi),%xmm8 movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi subq $0x80,%rdx jmp .Lecb_dec_loop8_enter .align 16 .Lecb_dec_loop8: movups %xmm2,(%rsi) movq %r11,%rcx movdqu (%rdi),%xmm2 movl %r10d,%eax movups %xmm3,16(%rsi) movdqu 16(%rdi),%xmm3 movups %xmm4,32(%rsi) movdqu 32(%rdi),%xmm4 movups %xmm5,48(%rsi) movdqu 48(%rdi),%xmm5 movups %xmm6,64(%rsi) movdqu 64(%rdi),%xmm6 movups %xmm7,80(%rsi) movdqu 80(%rdi),%xmm7 movups %xmm8,96(%rsi) movdqu 96(%rdi),%xmm8 movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi .Lecb_dec_loop8_enter: call _aesni_decrypt8 movups (%r11),%xmm0 subq $0x80,%rdx jnc .Lecb_dec_loop8 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movq %r11,%rcx movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movl %r10d,%eax movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) pxor %xmm8,%xmm8 movups %xmm9,112(%rsi) pxor %xmm9,%xmm9 leaq 128(%rsi),%rsi addq $0x80,%rdx jz .Lecb_ret .Lecb_dec_tail: movups (%rdi),%xmm2 cmpq $0x20,%rdx jb .Lecb_dec_one movups 16(%rdi),%xmm3 je .Lecb_dec_two movups 32(%rdi),%xmm4 cmpq $0x40,%rdx jb .Lecb_dec_three movups 48(%rdi),%xmm5 je .Lecb_dec_four movups 64(%rdi),%xmm6 cmpq $0x60,%rdx jb .Lecb_dec_five movups 80(%rdi),%xmm7 je .Lecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 xorps %xmm9,%xmm9 call _aesni_decrypt8 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) pxor %xmm8,%xmm8 pxor %xmm9,%xmm9 jmp .Lecb_ret .align 16 .Lecb_dec_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_4: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 jmp .Lecb_ret .align 16 .Lecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 jmp .Lecb_ret .align 16 .Lecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 jmp .Lecb_ret .align 16 .Lecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) pxor %xmm5,%xmm5 jmp .Lecb_ret .align 16 .Lecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 jmp .Lecb_ret .align 16 .Lecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) pxor %xmm7,%xmm7 .Lecb_ret: xorps %xmm0,%xmm0 pxor %xmm1,%xmm1 .byte 0xf3,0xc3 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt .globl aesni_ccm64_encrypt_blocks .type aesni_ccm64_encrypt_blocks,@function .align 16 aesni_ccm64_encrypt_blocks: movl 240(%rcx),%eax movdqu (%r8),%xmm6 movdqa .Lincrement64(%rip),%xmm9 movdqa .Lbswap_mask(%rip),%xmm7 shll $4,%eax movl $16,%r10d leaq 0(%rcx),%r11 movdqu (%r9),%xmm3 movdqa %xmm6,%xmm2 leaq 32(%rcx,%rax,1),%rcx .byte 102,15,56,0,247 subq %rax,%r10 jmp .Lccm64_enc_outer .align 16 .Lccm64_enc_outer: movups (%r11),%xmm0 movq %r10,%rax movups (%rdi),%xmm8 xorps %xmm0,%xmm2 movups 16(%r11),%xmm1 xorps %xmm8,%xmm0 xorps %xmm0,%xmm3 movups 32(%r11),%xmm0 .Lccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%rcx,%rax,1),%xmm0 jnz .Lccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq %xmm9,%xmm6 decq %rdx .byte 102,15,56,221,208 .byte 102,15,56,221,216 leaq 16(%rdi),%rdi xorps %xmm2,%xmm8 movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) .byte 102,15,56,0,215 leaq 16(%rsi),%rsi jnz .Lccm64_enc_outer pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movups %xmm3,(%r9) pxor %xmm3,%xmm3 pxor %xmm8,%xmm8 pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks .globl aesni_ccm64_decrypt_blocks .type aesni_ccm64_decrypt_blocks,@function .align 16 aesni_ccm64_decrypt_blocks: movl 240(%rcx),%eax movups (%r8),%xmm6 movdqu (%r9),%xmm3 movdqa .Lincrement64(%rip),%xmm9 movdqa .Lbswap_mask(%rip),%xmm7 movaps %xmm6,%xmm2 movl %eax,%r10d movq %rcx,%r11 .byte 102,15,56,0,247 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_enc1_5: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_5 .byte 102,15,56,221,209 shll $4,%r10d movl $16,%eax movups (%rdi),%xmm8 paddq %xmm9,%xmm6 leaq 16(%rdi),%rdi subq %r10,%rax leaq 32(%r11,%r10,1),%rcx movq %rax,%r10 jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_outer: xorps %xmm2,%xmm8 movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) leaq 16(%rsi),%rsi .byte 102,15,56,0,215 subq $1,%rdx jz .Lccm64_dec_break movups (%r11),%xmm0 movq %r10,%rax movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 xorps %xmm0,%xmm2 xorps %xmm8,%xmm3 movups 32(%r11),%xmm0 jmp .Lccm64_dec2_loop .align 16 .Lccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%rcx,%rax,1),%xmm0 jnz .Lccm64_dec2_loop movups (%rdi),%xmm8 paddq %xmm9,%xmm6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 leaq 16(%rdi),%rdi jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_break: movl 240(%r11),%eax movups (%r11),%xmm0 movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 leaq 32(%r11),%r11 xorps %xmm8,%xmm3 .Loop_enc1_6: .byte 102,15,56,220,217 decl %eax movups (%r11),%xmm1 leaq 16(%r11),%r11 jnz .Loop_enc1_6 .byte 102,15,56,221,217 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movups %xmm3,(%r9) pxor %xmm3,%xmm3 pxor %xmm8,%xmm8 pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: cmpq $1,%rdx jne .Lctr32_bulk movups (%r8),%xmm2 movups (%rdi),%xmm3 movl 240(%rcx),%edx movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_enc1_7: .byte 102,15,56,220,209 decl %edx movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_7 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 xorps %xmm3,%xmm2 pxor %xmm3,%xmm3 movups %xmm2,(%rsi) xorps %xmm2,%xmm2 jmp .Lctr32_epilogue .align 16 .Lctr32_bulk: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 movl 12(%r8),%r8d pxor %xmm0,%xmm2 movl 12(%rcx),%r11d movdqa %xmm2,0(%rsp) bswapl %r8d movdqa %xmm2,%xmm3 movdqa %xmm2,%xmm4 movdqa %xmm2,%xmm5 movdqa %xmm2,64(%rsp) movdqa %xmm2,80(%rsp) movdqa %xmm2,96(%rsp) movq %rdx,%r10 movdqa %xmm2,112(%rsp) leaq 1(%r8),%rax leaq 2(%r8),%rdx bswapl %eax bswapl %edx xorl %r11d,%eax xorl %r11d,%edx .byte 102,15,58,34,216,3 leaq 3(%r8),%rax movdqa %xmm3,16(%rsp) .byte 102,15,58,34,226,3 bswapl %eax movq %r10,%rdx leaq 4(%r8),%r10 movdqa %xmm4,32(%rsp) xorl %r11d,%eax bswapl %r10d .byte 102,15,58,34,232,3 xorl %r11d,%r10d movdqa %xmm5,48(%rsp) leaq 5(%r8),%r9 movl %r10d,64+12(%rsp) bswapl %r9d leaq 6(%r8),%r10 movl 240(%rcx),%eax xorl %r11d,%r9d bswapl %r10d movl %r9d,80+12(%rsp) xorl %r11d,%r10d leaq 7(%r8),%r9 movl %r10d,96+12(%rsp) bswapl %r9d movl OPENSSL_ia32cap_P+4(%rip),%r10d xorl %r11d,%r9d andl $71303168,%r10d movl %r9d,112+12(%rsp) movups 16(%rcx),%xmm1 movdqa 64(%rsp),%xmm6 movdqa 80(%rsp),%xmm7 cmpq $8,%rdx jb .Lctr32_tail subq $6,%rdx cmpl $4194304,%r10d je .Lctr32_6x leaq 128(%rcx),%rcx subq $2,%rdx jmp .Lctr32_loop8 .align 16 .Lctr32_6x: shll $4,%eax movl $48,%r10d bswapl %r11d leaq 32(%rcx,%rax,1),%rcx subq %rax,%r10 jmp .Lctr32_loop6 .align 16 .Lctr32_loop6: addl $6,%r8d movups -48(%rcx,%r10,1),%xmm0 .byte 102,15,56,220,209 movl %r8d,%eax xorl %r11d,%eax .byte 102,15,56,220,217 .byte 0x0f,0x38,0xf1,0x44,0x24,12 leal 1(%r8),%eax .byte 102,15,56,220,225 xorl %r11d,%eax .byte 0x0f,0x38,0xf1,0x44,0x24,28 .byte 102,15,56,220,233 leal 2(%r8),%eax xorl %r11d,%eax .byte 102,15,56,220,241 .byte 0x0f,0x38,0xf1,0x44,0x24,44 leal 3(%r8),%eax .byte 102,15,56,220,249 movups -32(%rcx,%r10,1),%xmm1 xorl %r11d,%eax .byte 102,15,56,220,208 .byte 0x0f,0x38,0xf1,0x44,0x24,60 leal 4(%r8),%eax .byte 102,15,56,220,216 xorl %r11d,%eax .byte 0x0f,0x38,0xf1,0x44,0x24,76 .byte 102,15,56,220,224 leal 5(%r8),%eax xorl %r11d,%eax .byte 102,15,56,220,232 .byte 0x0f,0x38,0xf1,0x44,0x24,92 movq %r10,%rax .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%rcx,%r10,1),%xmm0 call .Lenc_loop6 movdqu (%rdi),%xmm8 movdqu 16(%rdi),%xmm9 movdqu 32(%rdi),%xmm10 movdqu 48(%rdi),%xmm11 movdqu 64(%rdi),%xmm12 movdqu 80(%rdi),%xmm13 leaq 96(%rdi),%rdi movups -64(%rcx,%r10,1),%xmm1 pxor %xmm2,%xmm8 movaps 0(%rsp),%xmm2 pxor %xmm3,%xmm9 movaps 16(%rsp),%xmm3 pxor %xmm4,%xmm10 movaps 32(%rsp),%xmm4 pxor %xmm5,%xmm11 movaps 48(%rsp),%xmm5 pxor %xmm6,%xmm12 movaps 64(%rsp),%xmm6 pxor %xmm7,%xmm13 movaps 80(%rsp),%xmm7 movdqu %xmm8,(%rsi) movdqu %xmm9,16(%rsi) movdqu %xmm10,32(%rsi) movdqu %xmm11,48(%rsi) movdqu %xmm12,64(%rsi) movdqu %xmm13,80(%rsi) leaq 96(%rsi),%rsi subq $6,%rdx jnc .Lctr32_loop6 addq $6,%rdx jz .Lctr32_done leal -48(%r10),%eax leaq -80(%rcx,%r10,1),%rcx negl %eax shrl $4,%eax jmp .Lctr32_tail .align 32 .Lctr32_loop8: addl $8,%r8d movdqa 96(%rsp),%xmm8 .byte 102,15,56,220,209 movl %r8d,%r9d movdqa 112(%rsp),%xmm9 .byte 102,15,56,220,217 bswapl %r9d movups 32-128(%rcx),%xmm0 .byte 102,15,56,220,225 xorl %r11d,%r9d nop .byte 102,15,56,220,233 movl %r9d,0+12(%rsp) leaq 1(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 48-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,16+12(%rsp) leaq 2(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 64-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,32+12(%rsp) leaq 3(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 80-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,48+12(%rsp) leaq 4(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 96-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,64+12(%rsp) leaq 5(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 112-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movl %r9d,80+12(%rsp) leaq 6(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 128-128(%rcx),%xmm0 bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 xorl %r11d,%r9d .byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movl %r9d,96+12(%rsp) leaq 7(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 144-128(%rcx),%xmm1 bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 xorl %r11d,%r9d movdqu 0(%rdi),%xmm10 .byte 102,15,56,220,232 movl %r9d,112+12(%rsp) cmpl $11,%eax .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 160-128(%rcx),%xmm0 jb .Lctr32_enc_done .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 176-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 192-128(%rcx),%xmm0 je .Lctr32_enc_done .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 208-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 224-128(%rcx),%xmm0 jmp .Lctr32_enc_done .align 16 .Lctr32_enc_done: movdqu 16(%rdi),%xmm11 pxor %xmm0,%xmm10 movdqu 32(%rdi),%xmm12 pxor %xmm0,%xmm11 movdqu 48(%rdi),%xmm13 pxor %xmm0,%xmm12 movdqu 64(%rdi),%xmm14 pxor %xmm0,%xmm13 movdqu 80(%rdi),%xmm15 pxor %xmm0,%xmm14 pxor %xmm0,%xmm15 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movdqu 96(%rdi),%xmm1 leaq 128(%rdi),%rdi .byte 102,65,15,56,221,210 pxor %xmm0,%xmm1 movdqu 112-128(%rdi),%xmm10 .byte 102,65,15,56,221,219 pxor %xmm0,%xmm10 movdqa 0(%rsp),%xmm11 .byte 102,65,15,56,221,228 .byte 102,65,15,56,221,237 movdqa 16(%rsp),%xmm12 movdqa 32(%rsp),%xmm13 .byte 102,65,15,56,221,246 .byte 102,65,15,56,221,255 movdqa 48(%rsp),%xmm14 movdqa 64(%rsp),%xmm15 .byte 102,68,15,56,221,193 movdqa 80(%rsp),%xmm0 movups 16-128(%rcx),%xmm1 .byte 102,69,15,56,221,202 movups %xmm2,(%rsi) movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) movdqa %xmm0,%xmm7 movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi subq $8,%rdx jnc .Lctr32_loop8 addq $8,%rdx jz .Lctr32_done leaq -128(%rcx),%rcx .Lctr32_tail: leaq 16(%rcx),%rcx cmpq $4,%rdx jb .Lctr32_loop3 je .Lctr32_loop4 shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 movups 16(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 leaq 32-16(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,225 addq $16,%rax movups (%rdi),%xmm10 .byte 102,15,56,220,233 .byte 102,15,56,220,241 movups 16(%rdi),%xmm11 movups 32(%rdi),%xmm12 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 call .Lenc_loop8_enter movdqu 48(%rdi),%xmm13 pxor %xmm10,%xmm2 movdqu 64(%rdi),%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm10,%xmm6 movdqu %xmm5,48(%rsi) movdqu %xmm6,64(%rsi) cmpq $6,%rdx jb .Lctr32_done movups 80(%rdi),%xmm11 xorps %xmm11,%xmm7 movups %xmm7,80(%rsi) je .Lctr32_done movups 96(%rdi),%xmm12 xorps %xmm12,%xmm8 movups %xmm8,96(%rsi) jmp .Lctr32_done .align 32 .Lctr32_loop4: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx decl %eax .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%rcx),%xmm1 jnz .Lctr32_loop4 .byte 102,15,56,221,209 .byte 102,15,56,221,217 movups (%rdi),%xmm10 movups 16(%rdi),%xmm11 .byte 102,15,56,221,225 .byte 102,15,56,221,233 movups 32(%rdi),%xmm12 movups 48(%rdi),%xmm13 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) pxor %xmm12,%xmm4 movdqu %xmm4,32(%rsi) pxor %xmm13,%xmm5 movdqu %xmm5,48(%rsi) jmp .Lctr32_done .align 32 .Lctr32_loop3: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx decl %eax .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%rcx),%xmm1 jnz .Lctr32_loop3 .byte 102,15,56,221,209 .byte 102,15,56,221,217 .byte 102,15,56,221,225 movups (%rdi),%xmm10 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) cmpq $2,%rdx jb .Lctr32_done movups 16(%rdi),%xmm11 xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) je .Lctr32_done movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) .Lctr32_done: xorps %xmm0,%xmm0 xorl %r11d,%r11d pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 movaps %xmm0,0(%rsp) pxor %xmm8,%xmm8 movaps %xmm0,16(%rsp) pxor %xmm9,%xmm9 movaps %xmm0,32(%rsp) pxor %xmm10,%xmm10 movaps %xmm0,48(%rsp) pxor %xmm11,%xmm11 movaps %xmm0,64(%rsp) pxor %xmm12,%xmm12 movaps %xmm0,80(%rsp) pxor %xmm13,%xmm13 movaps %xmm0,96(%rsp) pxor %xmm14,%xmm14 movaps %xmm0,112(%rsp) pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lctr32_epilogue: .byte 0xf3,0xc3 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks .globl aesni_xts_encrypt .type aesni_xts_encrypt,@function .align 16 aesni_xts_encrypt: leaq (%rsp),%rax pushq %rbp subq $112,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 xorps %xmm0,%xmm2 .Loop_enc1_8: .byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz .Loop_enc1_8 .byte 102,15,56,221,209 movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx movups 16(%rcx,%r10,1),%xmm1 movdqa .Lxts_magic(%rip),%xmm8 movdqa %xmm2,%xmm15 pshufd $0x5f,%xmm2,%xmm9 pxor %xmm0,%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm10 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm11 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm12 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm13 pxor %xmm14,%xmm15 movdqa %xmm15,%xmm14 psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 movaps %xmm1,96(%rsp) subq $96,%rdx jc .Lxts_enc_short movl $16+96,%eax leaq 32(%r11,%r10,1),%rcx subq %r10,%rax movups 16(%r11),%xmm1 movq %rax,%r10 leaq .Lxts_magic(%rip),%r8 jmp .Lxts_enc_grandloop .align 32 .Lxts_enc_grandloop: movdqu 0(%rdi),%xmm2 movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 pxor %xmm10,%xmm2 movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 .byte 102,15,56,220,209 movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 .byte 102,15,56,220,217 movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 .byte 102,15,56,220,225 movdqu 80(%rdi),%xmm7 pxor %xmm15,%xmm8 movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 .byte 102,15,56,220,233 movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi pxor %xmm8,%xmm7 pxor %xmm9,%xmm10 .byte 102,15,56,220,241 pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) .byte 102,15,56,220,249 movups 48(%r11),%xmm1 pxor %xmm9,%xmm12 .byte 102,15,56,220,208 pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) .byte 102,15,56,220,216 pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) .byte 102,15,56,220,224 .byte 102,15,56,220,232 pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups 64(%r11),%xmm0 movdqa %xmm8,80(%rsp) pshufd $0x5f,%xmm15,%xmm9 jmp .Lxts_enc_loop6 .align 32 .Lxts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups -64(%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -80(%rcx,%rax,1),%xmm0 jnz .Lxts_enc_loop6 movdqa (%r8),%xmm8 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 paddq %xmm15,%xmm15 psrad $31,%xmm14 .byte 102,15,56,220,217 pand %xmm8,%xmm14 movups (%r11),%xmm10 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 pxor %xmm14,%xmm15 movaps %xmm10,%xmm11 .byte 102,15,56,220,249 movups -64(%rcx),%xmm1 movdqa %xmm9,%xmm14 .byte 102,15,56,220,208 paddd %xmm9,%xmm9 pxor %xmm15,%xmm10 .byte 102,15,56,220,216 psrad $31,%xmm14 paddq %xmm15,%xmm15 .byte 102,15,56,220,224 .byte 102,15,56,220,232 pand %xmm8,%xmm14 movaps %xmm11,%xmm12 .byte 102,15,56,220,240 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 .byte 102,15,56,220,248 movups -48(%rcx),%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 pxor %xmm15,%xmm11 psrad $31,%xmm14 .byte 102,15,56,220,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movdqa %xmm13,48(%rsp) pxor %xmm14,%xmm15 .byte 102,15,56,220,241 movaps %xmm12,%xmm13 movdqa %xmm9,%xmm14 .byte 102,15,56,220,249 movups -32(%rcx),%xmm1 paddd %xmm9,%xmm9 .byte 102,15,56,220,208 pxor %xmm15,%xmm12 psrad $31,%xmm14 .byte 102,15,56,220,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 pxor %xmm14,%xmm15 movaps %xmm13,%xmm14 .byte 102,15,56,220,248 movdqa %xmm9,%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 pxor %xmm15,%xmm13 psrad $31,%xmm0 .byte 102,15,56,220,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm0 .byte 102,15,56,220,225 .byte 102,15,56,220,233 pxor %xmm0,%xmm15 movups (%r11),%xmm0 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups 16(%r11),%xmm1 pxor %xmm15,%xmm14 .byte 102,15,56,221,84,36,0 psrad $31,%xmm9 paddq %xmm15,%xmm15 .byte 102,15,56,221,92,36,16 .byte 102,15,56,221,100,36,32 pand %xmm8,%xmm9 movq %r10,%rax .byte 102,15,56,221,108,36,48 .byte 102,15,56,221,116,36,64 .byte 102,15,56,221,124,36,80 pxor %xmm9,%xmm15 leaq 96(%rsi),%rsi movups %xmm2,-96(%rsi) movups %xmm3,-80(%rsi) movups %xmm4,-64(%rsi) movups %xmm5,-48(%rsi) movups %xmm6,-32(%rsi) movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_enc_grandloop movl $16+96,%eax subl %r10d,%eax movq %r11,%rcx shrl $4,%eax .Lxts_enc_short: movl %eax,%r10d pxor %xmm0,%xmm10 addq $96,%rdx jz .Lxts_enc_done pxor %xmm0,%xmm11 cmpq $0x20,%rdx jb .Lxts_enc_one pxor %xmm0,%xmm12 je .Lxts_enc_two pxor %xmm0,%xmm13 cmpq $0x40,%rdx jb .Lxts_enc_three pxor %xmm0,%xmm14 je .Lxts_enc_four movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 leaq 80(%rdi),%rdi pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 pxor %xmm7,%xmm7 call _aesni_encrypt6 xorps %xmm10,%xmm2 movdqa %xmm15,%xmm10 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movdqu %xmm2,(%rsi) xorps %xmm13,%xmm5 movdqu %xmm3,16(%rsi) xorps %xmm14,%xmm6 movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi jmp .Lxts_enc_done .align 16 .Lxts_enc_one: movups (%rdi),%xmm2 leaq 16(%rdi),%rdi xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_enc1_9: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_9 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 movups %xmm2,(%rsi) leaq 16(%rsi),%rsi jmp .Lxts_enc_done .align 16 .Lxts_enc_two: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 leaq 32(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 call _aesni_encrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 xorps %xmm11,%xmm3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) leaq 32(%rsi),%rsi jmp .Lxts_enc_done .align 16 .Lxts_enc_three: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 leaq 48(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 call _aesni_encrypt3 xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) leaq 48(%rsi),%rsi jmp .Lxts_enc_done .align 16 .Lxts_enc_four: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 leaq 64(%rdi),%rdi xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 xorps %xmm13,%xmm5 call _aesni_encrypt4 pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 pxor %xmm11,%xmm3 pxor %xmm12,%xmm4 movdqu %xmm2,(%rsi) pxor %xmm13,%xmm5 movdqu %xmm3,16(%rsi) movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_enc_done .align 16 .Lxts_enc_done: andq $15,%r9 jz .Lxts_enc_ret movq %r9,%rdx .Lxts_enc_steal: movzbl (%rdi),%eax movzbl -16(%rsi),%ecx leaq 1(%rdi),%rdi movb %al,-16(%rsi) movb %cl,0(%rsi) leaq 1(%rsi),%rsi subq $1,%rdx jnz .Lxts_enc_steal subq %r9,%rsi movq %r11,%rcx movl %r10d,%eax movups -16(%rsi),%xmm2 xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_enc1_10: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_10 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) .Lxts_enc_ret: xorps %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 movaps %xmm0,0(%rsp) pxor %xmm8,%xmm8 movaps %xmm0,16(%rsp) pxor %xmm9,%xmm9 movaps %xmm0,32(%rsp) pxor %xmm10,%xmm10 movaps %xmm0,48(%rsp) pxor %xmm11,%xmm11 movaps %xmm0,64(%rsp) pxor %xmm12,%xmm12 movaps %xmm0,80(%rsp) pxor %xmm13,%xmm13 movaps %xmm0,96(%rsp) pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_enc_epilogue: .byte 0xf3,0xc3 .size aesni_xts_encrypt,.-aesni_xts_encrypt .globl aesni_xts_decrypt .type aesni_xts_decrypt,@function .align 16 aesni_xts_decrypt: leaq (%rsp),%rax pushq %rbp subq $112,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 xorps %xmm0,%xmm2 .Loop_enc1_11: .byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz .Loop_enc1_11 .byte 102,15,56,221,209 xorl %eax,%eax testq $15,%rdx setnz %al shlq $4,%rax subq %rax,%rdx movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx movups 16(%rcx,%r10,1),%xmm1 movdqa .Lxts_magic(%rip),%xmm8 movdqa %xmm2,%xmm15 pshufd $0x5f,%xmm2,%xmm9 pxor %xmm0,%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm10 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm11 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm12 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm13 pxor %xmm14,%xmm15 movdqa %xmm15,%xmm14 psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 movaps %xmm1,96(%rsp) subq $96,%rdx jc .Lxts_dec_short movl $16+96,%eax leaq 32(%r11,%r10,1),%rcx subq %r10,%rax movups 16(%r11),%xmm1 movq %rax,%r10 leaq .Lxts_magic(%rip),%r8 jmp .Lxts_dec_grandloop .align 32 .Lxts_dec_grandloop: movdqu 0(%rdi),%xmm2 movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 pxor %xmm10,%xmm2 movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 .byte 102,15,56,222,209 movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 .byte 102,15,56,222,217 movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 .byte 102,15,56,222,225 movdqu 80(%rdi),%xmm7 pxor %xmm15,%xmm8 movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 .byte 102,15,56,222,233 movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi pxor %xmm8,%xmm7 pxor %xmm9,%xmm10 .byte 102,15,56,222,241 pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) .byte 102,15,56,222,249 movups 48(%r11),%xmm1 pxor %xmm9,%xmm12 .byte 102,15,56,222,208 pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) .byte 102,15,56,222,216 pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) .byte 102,15,56,222,224 .byte 102,15,56,222,232 pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups 64(%r11),%xmm0 movdqa %xmm8,80(%rsp) pshufd $0x5f,%xmm15,%xmm9 jmp .Lxts_dec_loop6 .align 32 .Lxts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 movups -64(%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -80(%rcx,%rax,1),%xmm0 jnz .Lxts_dec_loop6 movdqa (%r8),%xmm8 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 paddq %xmm15,%xmm15 psrad $31,%xmm14 .byte 102,15,56,222,217 pand %xmm8,%xmm14 movups (%r11),%xmm10 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 pxor %xmm14,%xmm15 movaps %xmm10,%xmm11 .byte 102,15,56,222,249 movups -64(%rcx),%xmm1 movdqa %xmm9,%xmm14 .byte 102,15,56,222,208 paddd %xmm9,%xmm9 pxor %xmm15,%xmm10 .byte 102,15,56,222,216 psrad $31,%xmm14 paddq %xmm15,%xmm15 .byte 102,15,56,222,224 .byte 102,15,56,222,232 pand %xmm8,%xmm14 movaps %xmm11,%xmm12 .byte 102,15,56,222,240 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 .byte 102,15,56,222,248 movups -48(%rcx),%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 pxor %xmm15,%xmm11 psrad $31,%xmm14 .byte 102,15,56,222,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movdqa %xmm13,48(%rsp) pxor %xmm14,%xmm15 .byte 102,15,56,222,241 movaps %xmm12,%xmm13 movdqa %xmm9,%xmm14 .byte 102,15,56,222,249 movups -32(%rcx),%xmm1 paddd %xmm9,%xmm9 .byte 102,15,56,222,208 pxor %xmm15,%xmm12 psrad $31,%xmm14 .byte 102,15,56,222,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 pxor %xmm14,%xmm15 movaps %xmm13,%xmm14 .byte 102,15,56,222,248 movdqa %xmm9,%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 pxor %xmm15,%xmm13 psrad $31,%xmm0 .byte 102,15,56,222,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm0 .byte 102,15,56,222,225 .byte 102,15,56,222,233 pxor %xmm0,%xmm15 movups (%r11),%xmm0 .byte 102,15,56,222,241 .byte 102,15,56,222,249 movups 16(%r11),%xmm1 pxor %xmm15,%xmm14 .byte 102,15,56,223,84,36,0 psrad $31,%xmm9 paddq %xmm15,%xmm15 .byte 102,15,56,223,92,36,16 .byte 102,15,56,223,100,36,32 pand %xmm8,%xmm9 movq %r10,%rax .byte 102,15,56,223,108,36,48 .byte 102,15,56,223,116,36,64 .byte 102,15,56,223,124,36,80 pxor %xmm9,%xmm15 leaq 96(%rsi),%rsi movups %xmm2,-96(%rsi) movups %xmm3,-80(%rsi) movups %xmm4,-64(%rsi) movups %xmm5,-48(%rsi) movups %xmm6,-32(%rsi) movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_dec_grandloop movl $16+96,%eax subl %r10d,%eax movq %r11,%rcx shrl $4,%eax .Lxts_dec_short: movl %eax,%r10d pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 addq $96,%rdx jz .Lxts_dec_done pxor %xmm0,%xmm12 cmpq $0x20,%rdx jb .Lxts_dec_one pxor %xmm0,%xmm13 je .Lxts_dec_two pxor %xmm0,%xmm14 cmpq $0x40,%rdx jb .Lxts_dec_three je .Lxts_dec_four movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 leaq 80(%rdi),%rdi pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 call _aesni_decrypt6 xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movdqu %xmm2,(%rsi) xorps %xmm13,%xmm5 movdqu %xmm3,16(%rsi) xorps %xmm14,%xmm6 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm14 movdqu %xmm5,48(%rsi) pcmpgtd %xmm15,%xmm14 movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi pshufd $0x13,%xmm14,%xmm11 andq $15,%r9 jz .Lxts_dec_ret movdqa %xmm15,%xmm10 paddq %xmm15,%xmm15 pand %xmm8,%xmm11 pxor %xmm15,%xmm11 jmp .Lxts_dec_done2 .align 16 .Lxts_dec_one: movups (%rdi),%xmm2 leaq 16(%rdi),%rdi xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_12: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_12 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 movups %xmm2,(%rsi) movdqa %xmm12,%xmm11 leaq 16(%rsi),%rsi jmp .Lxts_dec_done .align 16 .Lxts_dec_two: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 leaq 32(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 call _aesni_decrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 xorps %xmm11,%xmm3 movdqa %xmm13,%xmm11 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) leaq 32(%rsi),%rsi jmp .Lxts_dec_done .align 16 .Lxts_dec_three: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 leaq 48(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 call _aesni_decrypt3 xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 movdqa %xmm14,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) leaq 48(%rsi),%rsi jmp .Lxts_dec_done .align 16 .Lxts_dec_four: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 leaq 64(%rdi),%rdi xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 xorps %xmm13,%xmm5 call _aesni_decrypt4 pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 pxor %xmm11,%xmm3 movdqa %xmm15,%xmm11 pxor %xmm12,%xmm4 movdqu %xmm2,(%rsi) pxor %xmm13,%xmm5 movdqu %xmm3,16(%rsi) movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_dec_done .align 16 .Lxts_dec_done: andq $15,%r9 jz .Lxts_dec_ret .Lxts_dec_done2: movq %r9,%rdx movq %r11,%rcx movl %r10d,%eax movups (%rdi),%xmm2 xorps %xmm11,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_13: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_13 .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) .Lxts_dec_steal: movzbl 16(%rdi),%eax movzbl (%rsi),%ecx leaq 1(%rdi),%rdi movb %al,(%rsi) movb %cl,16(%rsi) leaq 1(%rsi),%rsi subq $1,%rdx jnz .Lxts_dec_steal subq %r9,%rsi movq %r11,%rcx movl %r10d,%eax movups (%rsi),%xmm2 xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_14: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_14 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) .Lxts_dec_ret: xorps %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 movaps %xmm0,0(%rsp) pxor %xmm8,%xmm8 movaps %xmm0,16(%rsp) pxor %xmm9,%xmm9 movaps %xmm0,32(%rsp) pxor %xmm10,%xmm10 movaps %xmm0,48(%rsp) pxor %xmm11,%xmm11 movaps %xmm0,64(%rsp) pxor %xmm12,%xmm12 movaps %xmm0,80(%rsp) pxor %xmm13,%xmm13 movaps %xmm0,96(%rsp) pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_dec_epilogue: .byte 0xf3,0xc3 .size aesni_xts_decrypt,.-aesni_xts_decrypt .globl aesni_cbc_encrypt .type aesni_cbc_encrypt,@function .align 16 aesni_cbc_encrypt: testq %rdx,%rdx jz .Lcbc_ret movl 240(%rcx),%r10d movq %rcx,%r11 testl %r9d,%r9d jz .Lcbc_decrypt movups (%r8),%xmm2 movl %r10d,%eax cmpq $16,%rdx jb .Lcbc_enc_tail subq $16,%rdx jmp .Lcbc_enc_loop .align 16 .Lcbc_enc_loop: movups (%rdi),%xmm3 leaq 16(%rdi),%rdi movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 xorps %xmm0,%xmm3 leaq 32(%rcx),%rcx xorps %xmm3,%xmm2 .Loop_enc1_15: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_15 .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx movups %xmm2,0(%rsi) leaq 16(%rsi),%rsi subq $16,%rdx jnc .Lcbc_enc_loop addq $16,%rdx jnz .Lcbc_enc_tail pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%r8) pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 jmp .Lcbc_ret .Lcbc_enc_tail: movq %rdx,%rcx xchgq %rdi,%rsi .long 0x9066A4F3 movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax .long 0x9066AAF3 leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi movq %r11,%rcx xorq %rdx,%rdx jmp .Lcbc_enc_loop .align 16 .Lcbc_decrypt: cmpq $16,%rdx jne .Lcbc_decrypt_bulk movdqu (%rdi),%xmm2 movdqu (%r8),%xmm3 movdqa %xmm2,%xmm4 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_16: .byte 102,15,56,222,209 decl %r10d movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_16 .byte 102,15,56,223,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movdqu %xmm4,(%r8) xorps %xmm3,%xmm2 pxor %xmm3,%xmm3 movups %xmm2,(%rsi) pxor %xmm2,%xmm2 jmp .Lcbc_ret .align 16 .Lcbc_decrypt_bulk: leaq (%rsp),%rax pushq %rbp subq $16,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r8),%xmm10 movl %r10d,%eax cmpq $0x50,%rdx jbe .Lcbc_dec_tail movups (%rcx),%xmm0 movdqu 0(%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqa %xmm2,%xmm11 movdqu 32(%rdi),%xmm4 movdqa %xmm3,%xmm12 movdqu 48(%rdi),%xmm5 movdqa %xmm4,%xmm13 movdqu 64(%rdi),%xmm6 movdqa %xmm5,%xmm14 movdqu 80(%rdi),%xmm7 movdqa %xmm6,%xmm15 movl OPENSSL_ia32cap_P+4(%rip),%r9d cmpq $0x70,%rdx jbe .Lcbc_dec_six_or_seven andl $71303168,%r9d subq $0x50,%rdx cmpl $4194304,%r9d je .Lcbc_dec_loop6_enter subq $0x20,%rdx leaq 112(%rcx),%rcx jmp .Lcbc_dec_loop8_enter .align 16 .Lcbc_dec_loop8: movups %xmm9,(%rsi) leaq 16(%rsi),%rsi .Lcbc_dec_loop8_enter: movdqu 96(%rdi),%xmm8 pxor %xmm0,%xmm2 movdqu 112(%rdi),%xmm9 pxor %xmm0,%xmm3 movups 16-112(%rcx),%xmm1 pxor %xmm0,%xmm4 xorq %r11,%r11 cmpq $0x70,%rdx pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 pxor %xmm0,%xmm8 .byte 102,15,56,222,209 pxor %xmm0,%xmm9 movups 32-112(%rcx),%xmm0 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 setnc %r11b shlq $7,%r11 .byte 102,68,15,56,222,201 addq %rdi,%r11 movups 48-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 64-112(%rcx),%xmm0 nop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 80-112(%rcx),%xmm1 nop .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 96-112(%rcx),%xmm0 nop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 112-112(%rcx),%xmm1 nop .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 128-112(%rcx),%xmm0 nop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 144-112(%rcx),%xmm1 cmpl $11,%eax .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 160-112(%rcx),%xmm0 jb .Lcbc_dec_done .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 176-112(%rcx),%xmm1 nop .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 192-112(%rcx),%xmm0 je .Lcbc_dec_done .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 208-112(%rcx),%xmm1 nop .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 224-112(%rcx),%xmm0 jmp .Lcbc_dec_done .align 16 .Lcbc_dec_done: .byte 102,15,56,222,209 .byte 102,15,56,222,217 pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 .byte 102,15,56,222,225 .byte 102,15,56,222,233 pxor %xmm0,%xmm12 pxor %xmm0,%xmm13 .byte 102,15,56,222,241 .byte 102,15,56,222,249 pxor %xmm0,%xmm14 pxor %xmm0,%xmm15 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movdqu 80(%rdi),%xmm1 .byte 102,65,15,56,223,210 movdqu 96(%rdi),%xmm10 pxor %xmm0,%xmm1 .byte 102,65,15,56,223,219 pxor %xmm0,%xmm10 movdqu 112(%rdi),%xmm0 .byte 102,65,15,56,223,228 leaq 128(%rdi),%rdi movdqu 0(%r11),%xmm11 .byte 102,65,15,56,223,237 .byte 102,65,15,56,223,246 movdqu 16(%r11),%xmm12 movdqu 32(%r11),%xmm13 .byte 102,65,15,56,223,255 .byte 102,68,15,56,223,193 movdqu 48(%r11),%xmm14 movdqu 64(%r11),%xmm15 .byte 102,69,15,56,223,202 movdqa %xmm0,%xmm10 movdqu 80(%r11),%xmm1 movups -112(%rcx),%xmm0 movups %xmm2,(%rsi) movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) movdqa %xmm1,%xmm7 movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi subq $0x80,%rdx ja .Lcbc_dec_loop8 movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $0x70,%rdx jle .Lcbc_dec_clear_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $0x50,%rdx jbe .Lcbc_dec_tail movaps %xmm11,%xmm2 .Lcbc_dec_six_or_seven: cmpq $0x60,%rdx ja .Lcbc_dec_seven movaps %xmm7,%xmm8 call _aesni_decrypt6 pxor %xmm10,%xmm2 movaps %xmm8,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) pxor %xmm6,%xmm6 leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 pxor %xmm7,%xmm7 jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_seven: movups 96(%rdi),%xmm8 xorps %xmm9,%xmm9 call _aesni_decrypt8 movups 80(%rdi),%xmm9 pxor %xmm10,%xmm2 movups 96(%rdi),%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) pxor %xmm6,%xmm6 pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) pxor %xmm7,%xmm7 leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 pxor %xmm8,%xmm8 pxor %xmm9,%xmm9 jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_loop6: movups %xmm7,(%rsi) leaq 16(%rsi),%rsi movdqu 0(%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqa %xmm2,%xmm11 movdqu 32(%rdi),%xmm4 movdqa %xmm3,%xmm12 movdqu 48(%rdi),%xmm5 movdqa %xmm4,%xmm13 movdqu 64(%rdi),%xmm6 movdqa %xmm5,%xmm14 movdqu 80(%rdi),%xmm7 movdqa %xmm6,%xmm15 .Lcbc_dec_loop6_enter: leaq 96(%rdi),%rdi movdqa %xmm7,%xmm8 call _aesni_decrypt6 pxor %xmm10,%xmm2 movdqa %xmm8,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm6 movq %r11,%rcx movdqu %xmm5,48(%rsi) pxor %xmm15,%xmm7 movl %r10d,%eax movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi subq $0x60,%rdx ja .Lcbc_dec_loop6 movdqa %xmm7,%xmm2 addq $0x50,%rdx jle .Lcbc_dec_clear_tail_collected movups %xmm7,(%rsi) leaq 16(%rsi),%rsi .Lcbc_dec_tail: movups (%rdi),%xmm2 subq $0x10,%rdx jbe .Lcbc_dec_one movups 16(%rdi),%xmm3 movaps %xmm2,%xmm11 subq $0x10,%rdx jbe .Lcbc_dec_two movups 32(%rdi),%xmm4 movaps %xmm3,%xmm12 subq $0x10,%rdx jbe .Lcbc_dec_three movups 48(%rdi),%xmm5 movaps %xmm4,%xmm13 subq $0x10,%rdx jbe .Lcbc_dec_four movups 64(%rdi),%xmm6 movaps %xmm5,%xmm14 movaps %xmm6,%xmm15 xorps %xmm7,%xmm7 call _aesni_decrypt6 pxor %xmm10,%xmm2 movaps %xmm15,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) pxor %xmm5,%xmm5 leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 subq $0x10,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_one: movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 .Loop_dec1_17: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_17 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_two: movaps %xmm3,%xmm12 call _aesni_decrypt2 pxor %xmm10,%xmm2 movaps %xmm12,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 pxor %xmm3,%xmm3 leaq 16(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_three: movaps %xmm4,%xmm13 call _aesni_decrypt3 pxor %xmm10,%xmm2 movaps %xmm13,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm3,%xmm3 movdqa %xmm4,%xmm2 pxor %xmm4,%xmm4 leaq 32(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_four: movaps %xmm5,%xmm14 call _aesni_decrypt4 pxor %xmm10,%xmm2 movaps %xmm14,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm4,%xmm4 movdqa %xmm5,%xmm2 pxor %xmm5,%xmm5 leaq 48(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_clear_tail_collected: pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 pxor %xmm8,%xmm8 pxor %xmm9,%xmm9 .Lcbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) pxor %xmm2,%xmm2 jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: movaps %xmm2,(%rsp) pxor %xmm2,%xmm2 movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 movdqa %xmm2,(%rsp) .Lcbc_dec_ret: xorps %xmm0,%xmm0 pxor %xmm1,%xmm1 leaq (%rbp),%rsp popq %rbp .Lcbc_ret: .byte 0xf3,0xc3 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt .globl aesni_set_decrypt_key .type aesni_set_decrypt_key,@function .align 16 aesni_set_decrypt_key: .byte 0x48,0x83,0xEC,0x08 call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax jnz .Ldec_key_ret leaq 16(%rdx,%rsi,1),%rdi movups (%rdx),%xmm0 movups (%rdi),%xmm1 movups %xmm0,(%rdi) movups %xmm1,(%rdx) leaq 16(%rdx),%rdx leaq -16(%rdi),%rdi .Ldec_key_inverse: movups (%rdx),%xmm0 movups (%rdi),%xmm1 .byte 102,15,56,219,192 .byte 102,15,56,219,201 leaq 16(%rdx),%rdx leaq -16(%rdi),%rdi movups %xmm0,16(%rdi) movups %xmm1,-16(%rdx) cmpq %rdx,%rdi ja .Ldec_key_inverse movups (%rdx),%xmm0 .byte 102,15,56,219,192 pxor %xmm1,%xmm1 movups %xmm0,(%rdi) pxor %xmm0,%xmm0 .Ldec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 .LSEH_end_set_decrypt_key: .size aesni_set_decrypt_key,.-aesni_set_decrypt_key .globl aesni_set_encrypt_key .type aesni_set_encrypt_key,@function .align 16 aesni_set_encrypt_key: __aesni_set_encrypt_key: .byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rdi,%rdi jz .Lenc_key_ret testq %rdx,%rdx jz .Lenc_key_ret movl $268437504,%r10d movups (%rdi),%xmm0 xorps %xmm4,%xmm4 andl OPENSSL_ia32cap_P+4(%rip),%r10d leaq 16(%rdx),%rax cmpl $256,%esi je .L14rounds cmpl $192,%esi je .L12rounds cmpl $128,%esi jne .Lbad_keybits .L10rounds: movl $9,%esi cmpl $268435456,%r10d je .L10rounds_alt movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call .Lkey_expansion_128_cold .byte 102,15,58,223,200,2 call .Lkey_expansion_128 .byte 102,15,58,223,200,4 call .Lkey_expansion_128 .byte 102,15,58,223,200,8 call .Lkey_expansion_128 .byte 102,15,58,223,200,16 call .Lkey_expansion_128 .byte 102,15,58,223,200,32 call .Lkey_expansion_128 .byte 102,15,58,223,200,64 call .Lkey_expansion_128 .byte 102,15,58,223,200,128 call .Lkey_expansion_128 .byte 102,15,58,223,200,27 call .Lkey_expansion_128 .byte 102,15,58,223,200,54 call .Lkey_expansion_128 movups %xmm0,(%rax) movl %esi,80(%rax) xorl %eax,%eax jmp .Lenc_key_ret .align 16 .L10rounds_alt: movdqa .Lkey_rotate(%rip),%xmm5 movl $8,%r10d movdqa .Lkey_rcon1(%rip),%xmm4 movdqa %xmm0,%xmm2 movdqu %xmm0,(%rdx) jmp .Loop_key128 .align 16 .Loop_key128: .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 leaq 16(%rax),%rax movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,-16(%rax) movdqa %xmm0,%xmm2 decl %r10d jnz .Loop_key128 movdqa .Lkey_rcon1b(%rip),%xmm4 .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,(%rax) movdqa %xmm0,%xmm2 .byte 102,15,56,0,197 .byte 102,15,56,221,196 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,16(%rax) movl %esi,96(%rax) xorl %eax,%eax jmp .Lenc_key_ret .align 16 .L12rounds: movq 16(%rdi),%xmm2 movl $11,%esi cmpl $268435456,%r10d je .L12rounds_alt movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call .Lkey_expansion_192a_cold .byte 102,15,58,223,202,2 call .Lkey_expansion_192b .byte 102,15,58,223,202,4 call .Lkey_expansion_192a .byte 102,15,58,223,202,8 call .Lkey_expansion_192b .byte 102,15,58,223,202,16 call .Lkey_expansion_192a .byte 102,15,58,223,202,32 call .Lkey_expansion_192b .byte 102,15,58,223,202,64 call .Lkey_expansion_192a .byte 102,15,58,223,202,128 call .Lkey_expansion_192b movups %xmm0,(%rax) movl %esi,48(%rax) xorq %rax,%rax jmp .Lenc_key_ret .align 16 .L12rounds_alt: movdqa .Lkey_rotate192(%rip),%xmm5 movdqa .Lkey_rcon1(%rip),%xmm4 movl $8,%r10d movdqu %xmm0,(%rdx) jmp .Loop_key192 .align 16 .Loop_key192: movq %xmm2,0(%rax) movdqa %xmm2,%xmm1 .byte 102,15,56,0,213 .byte 102,15,56,221,212 pslld $1,%xmm4 leaq 24(%rax),%rax movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pshufd $0xff,%xmm0,%xmm3 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pxor %xmm2,%xmm0 pxor %xmm3,%xmm2 movdqu %xmm0,-16(%rax) decl %r10d jnz .Loop_key192 movl %esi,32(%rax) xorl %eax,%eax jmp .Lenc_key_ret .align 16 .L14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax cmpl $268435456,%r10d je .L14rounds_alt movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 call .Lkey_expansion_256a_cold .byte 102,15,58,223,200,1 call .Lkey_expansion_256b .byte 102,15,58,223,202,2 call .Lkey_expansion_256a .byte 102,15,58,223,200,2 call .Lkey_expansion_256b .byte 102,15,58,223,202,4 call .Lkey_expansion_256a .byte 102,15,58,223,200,4 call .Lkey_expansion_256b .byte 102,15,58,223,202,8 call .Lkey_expansion_256a .byte 102,15,58,223,200,8 call .Lkey_expansion_256b .byte 102,15,58,223,202,16 call .Lkey_expansion_256a .byte 102,15,58,223,200,16 call .Lkey_expansion_256b .byte 102,15,58,223,202,32 call .Lkey_expansion_256a .byte 102,15,58,223,200,32 call .Lkey_expansion_256b .byte 102,15,58,223,202,64 call .Lkey_expansion_256a movups %xmm0,(%rax) movl %esi,16(%rax) xorq %rax,%rax jmp .Lenc_key_ret .align 16 .L14rounds_alt: movdqa .Lkey_rotate(%rip),%xmm5 movdqa .Lkey_rcon1(%rip),%xmm4 movl $7,%r10d movdqu %xmm0,0(%rdx) movdqa %xmm2,%xmm1 movdqu %xmm2,16(%rdx) jmp .Loop_key256 .align 16 .Loop_key256: .byte 102,15,56,0,213 .byte 102,15,56,221,212 movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pslld $1,%xmm4 pxor %xmm2,%xmm0 movdqu %xmm0,(%rax) decl %r10d jz .Ldone_key256 pshufd $0xff,%xmm0,%xmm2 pxor %xmm3,%xmm3 .byte 102,15,56,221,211 movdqa %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm3,%xmm1 pxor %xmm1,%xmm2 movdqu %xmm2,16(%rax) leaq 32(%rax),%rax movdqa %xmm2,%xmm1 jmp .Loop_key256 .Ldone_key256: movl %esi,16(%rax) xorl %eax,%eax jmp .Lenc_key_ret .align 16 .Lbad_keybits: movq $-2,%rax .Lenc_key_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 .LSEH_end_set_encrypt_key: .align 16 .Lkey_expansion_128: movups %xmm0,(%rax) leaq 16(%rax),%rax .Lkey_expansion_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .align 16 .Lkey_expansion_192a: movups %xmm0,(%rax) leaq 16(%rax),%rax .Lkey_expansion_192a_cold: movaps %xmm2,%xmm5 .Lkey_expansion_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 pslldq $4,%xmm3 xorps %xmm4,%xmm0 pshufd $85,%xmm1,%xmm1 pxor %xmm3,%xmm2 pxor %xmm1,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm3,%xmm2 .byte 0xf3,0xc3 .align 16 .Lkey_expansion_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%rax) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%rax) leaq 32(%rax),%rax jmp .Lkey_expansion_192b_warm .align 16 .Lkey_expansion_256a: movups %xmm2,(%rax) leaq 16(%rax),%rax .Lkey_expansion_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .align 16 .Lkey_expansion_256b: movups %xmm0,(%rax) leaq 16(%rax),%rax shufps $16,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $140,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 .byte 0xf3,0xc3 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .Lincrement32: .long 6,6,6,0 .Lincrement64: .long 1,0,0,0 .Lxts_magic: .long 0x87,0,1,0 .Lincrement1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .Lkey_rotate: .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d .Lkey_rotate192: .long 0x04070605,0x04070605,0x04070605,0x04070605 .Lkey_rcon1: .long 1,1,1,1 .Lkey_rcon1b: .long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 Index: head/secure/lib/libcrypto/amd64/bsaes-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/bsaes-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/bsaes-x86_64.S (revision 299481) @@ -1,2499 +1,2500 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from bsaes-x86_64.pl. .text .type _bsaes_encrypt8,@function .align 64 _bsaes_encrypt8: leaq .LBS0(%rip),%r11 movdqa (%rax),%xmm8 leaq 16(%rax),%rax movdqa 80(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 pxor %xmm8,%xmm1 pxor %xmm8,%xmm2 .byte 102,68,15,56,0,255 .byte 102,15,56,0,199 pxor %xmm8,%xmm3 pxor %xmm8,%xmm4 .byte 102,15,56,0,207 .byte 102,15,56,0,215 pxor %xmm8,%xmm5 pxor %xmm8,%xmm6 .byte 102,15,56,0,223 .byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 _bsaes_encrypt8_bitslice: movdqa 0(%r11),%xmm7 movdqa 16(%r11),%xmm8 movdqa %xmm5,%xmm9 psrlq $1,%xmm5 movdqa %xmm3,%xmm10 psrlq $1,%xmm3 pxor %xmm6,%xmm5 pxor %xmm4,%xmm3 pand %xmm7,%xmm5 pand %xmm7,%xmm3 pxor %xmm5,%xmm6 psllq $1,%xmm5 pxor %xmm3,%xmm4 psllq $1,%xmm3 pxor %xmm9,%xmm5 pxor %xmm10,%xmm3 movdqa %xmm1,%xmm9 psrlq $1,%xmm1 movdqa %xmm15,%xmm10 psrlq $1,%xmm15 pxor %xmm2,%xmm1 pxor %xmm0,%xmm15 pand %xmm7,%xmm1 pand %xmm7,%xmm15 pxor %xmm1,%xmm2 psllq $1,%xmm1 pxor %xmm15,%xmm0 psllq $1,%xmm15 pxor %xmm9,%xmm1 pxor %xmm10,%xmm15 movdqa 32(%r11),%xmm7 movdqa %xmm4,%xmm9 psrlq $2,%xmm4 movdqa %xmm3,%xmm10 psrlq $2,%xmm3 pxor %xmm6,%xmm4 pxor %xmm5,%xmm3 pand %xmm8,%xmm4 pand %xmm8,%xmm3 pxor %xmm4,%xmm6 psllq $2,%xmm4 pxor %xmm3,%xmm5 psllq $2,%xmm3 pxor %xmm9,%xmm4 pxor %xmm10,%xmm3 movdqa %xmm0,%xmm9 psrlq $2,%xmm0 movdqa %xmm15,%xmm10 psrlq $2,%xmm15 pxor %xmm2,%xmm0 pxor %xmm1,%xmm15 pand %xmm8,%xmm0 pand %xmm8,%xmm15 pxor %xmm0,%xmm2 psllq $2,%xmm0 pxor %xmm15,%xmm1 psllq $2,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa %xmm2,%xmm9 psrlq $4,%xmm2 movdqa %xmm1,%xmm10 psrlq $4,%xmm1 pxor %xmm6,%xmm2 pxor %xmm5,%xmm1 pand %xmm7,%xmm2 pand %xmm7,%xmm1 pxor %xmm2,%xmm6 psllq $4,%xmm2 pxor %xmm1,%xmm5 psllq $4,%xmm1 pxor %xmm9,%xmm2 pxor %xmm10,%xmm1 movdqa %xmm0,%xmm9 psrlq $4,%xmm0 movdqa %xmm15,%xmm10 psrlq $4,%xmm15 pxor %xmm4,%xmm0 pxor %xmm3,%xmm15 pand %xmm7,%xmm0 pand %xmm7,%xmm15 pxor %xmm0,%xmm4 psllq $4,%xmm0 pxor %xmm15,%xmm3 psllq $4,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 decl %r10d jmp .Lenc_sbox .align 16 .Lenc_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 pxor 32(%rax),%xmm1 pxor 48(%rax),%xmm2 .byte 102,68,15,56,0,255 .byte 102,15,56,0,199 pxor 64(%rax),%xmm3 pxor 80(%rax),%xmm4 .byte 102,15,56,0,207 .byte 102,15,56,0,215 pxor 96(%rax),%xmm5 pxor 112(%rax),%xmm6 .byte 102,15,56,0,223 .byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 leaq 128(%rax),%rax .Lenc_sbox: pxor %xmm5,%xmm4 pxor %xmm0,%xmm1 pxor %xmm15,%xmm2 pxor %xmm1,%xmm5 pxor %xmm15,%xmm4 pxor %xmm2,%xmm5 pxor %xmm6,%xmm2 pxor %xmm4,%xmm6 pxor %xmm3,%xmm2 pxor %xmm4,%xmm3 pxor %xmm0,%xmm2 pxor %xmm6,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm6,%xmm10 movdqa %xmm0,%xmm9 movdqa %xmm4,%xmm8 movdqa %xmm1,%xmm12 movdqa %xmm5,%xmm11 pxor %xmm3,%xmm10 pxor %xmm1,%xmm9 pxor %xmm2,%xmm8 movdqa %xmm10,%xmm13 pxor %xmm3,%xmm12 movdqa %xmm9,%xmm7 pxor %xmm15,%xmm11 movdqa %xmm10,%xmm14 por %xmm8,%xmm9 por %xmm11,%xmm10 pxor %xmm7,%xmm14 pand %xmm11,%xmm13 pxor %xmm8,%xmm11 pand %xmm8,%xmm7 pand %xmm11,%xmm14 movdqa %xmm2,%xmm11 pxor %xmm15,%xmm11 pand %xmm11,%xmm12 pxor %xmm12,%xmm10 pxor %xmm12,%xmm9 movdqa %xmm6,%xmm12 movdqa %xmm4,%xmm11 pxor %xmm0,%xmm12 pxor %xmm5,%xmm11 movdqa %xmm12,%xmm8 pand %xmm11,%xmm12 por %xmm11,%xmm8 pxor %xmm12,%xmm7 pxor %xmm14,%xmm10 pxor %xmm13,%xmm9 pxor %xmm14,%xmm8 movdqa %xmm1,%xmm11 pxor %xmm13,%xmm7 movdqa %xmm3,%xmm12 pxor %xmm13,%xmm8 movdqa %xmm0,%xmm13 pand %xmm2,%xmm11 movdqa %xmm6,%xmm14 pand %xmm15,%xmm12 pand %xmm4,%xmm13 por %xmm5,%xmm14 pxor %xmm11,%xmm10 pxor %xmm12,%xmm9 pxor %xmm13,%xmm8 pxor %xmm14,%xmm7 movdqa %xmm10,%xmm11 pand %xmm8,%xmm10 pxor %xmm9,%xmm11 movdqa %xmm7,%xmm13 movdqa %xmm11,%xmm14 pxor %xmm10,%xmm13 pand %xmm13,%xmm14 movdqa %xmm8,%xmm12 pxor %xmm9,%xmm14 pxor %xmm7,%xmm12 pxor %xmm9,%xmm10 pand %xmm10,%xmm12 movdqa %xmm13,%xmm9 pxor %xmm7,%xmm12 pxor %xmm12,%xmm9 pxor %xmm12,%xmm8 pand %xmm7,%xmm9 pxor %xmm9,%xmm13 pxor %xmm9,%xmm8 pand %xmm14,%xmm13 pxor %xmm11,%xmm13 movdqa %xmm5,%xmm11 movdqa %xmm4,%xmm7 movdqa %xmm14,%xmm9 pxor %xmm13,%xmm9 pand %xmm5,%xmm9 pxor %xmm4,%xmm5 pand %xmm14,%xmm4 pand %xmm13,%xmm5 pxor %xmm4,%xmm5 pxor %xmm9,%xmm4 pxor %xmm15,%xmm11 pxor %xmm2,%xmm7 pxor %xmm12,%xmm14 pxor %xmm8,%xmm13 movdqa %xmm14,%xmm10 movdqa %xmm12,%xmm9 pxor %xmm13,%xmm10 pxor %xmm8,%xmm9 pand %xmm11,%xmm10 pand %xmm15,%xmm9 pxor %xmm7,%xmm11 pxor %xmm2,%xmm15 pand %xmm14,%xmm7 pand %xmm12,%xmm2 pand %xmm13,%xmm11 pand %xmm8,%xmm15 pxor %xmm11,%xmm7 pxor %xmm2,%xmm15 pxor %xmm10,%xmm11 pxor %xmm9,%xmm2 pxor %xmm11,%xmm5 pxor %xmm11,%xmm15 pxor %xmm7,%xmm4 pxor %xmm7,%xmm2 movdqa %xmm6,%xmm11 movdqa %xmm0,%xmm7 pxor %xmm3,%xmm11 pxor %xmm1,%xmm7 movdqa %xmm14,%xmm10 movdqa %xmm12,%xmm9 pxor %xmm13,%xmm10 pxor %xmm8,%xmm9 pand %xmm11,%xmm10 pand %xmm3,%xmm9 pxor %xmm7,%xmm11 pxor %xmm1,%xmm3 pand %xmm14,%xmm7 pand %xmm12,%xmm1 pand %xmm13,%xmm11 pand %xmm8,%xmm3 pxor %xmm11,%xmm7 pxor %xmm1,%xmm3 pxor %xmm10,%xmm11 pxor %xmm9,%xmm1 pxor %xmm12,%xmm14 pxor %xmm8,%xmm13 movdqa %xmm14,%xmm10 pxor %xmm13,%xmm10 pand %xmm6,%xmm10 pxor %xmm0,%xmm6 pand %xmm14,%xmm0 pand %xmm13,%xmm6 pxor %xmm0,%xmm6 pxor %xmm10,%xmm0 pxor %xmm11,%xmm6 pxor %xmm11,%xmm3 pxor %xmm7,%xmm0 pxor %xmm7,%xmm1 pxor %xmm15,%xmm6 pxor %xmm5,%xmm0 pxor %xmm6,%xmm3 pxor %xmm15,%xmm5 pxor %xmm0,%xmm15 pxor %xmm4,%xmm0 pxor %xmm1,%xmm4 pxor %xmm2,%xmm1 pxor %xmm4,%xmm2 pxor %xmm4,%xmm3 pxor %xmm2,%xmm5 decl %r10d jl .Lenc_done pshufd $0x93,%xmm15,%xmm7 pshufd $0x93,%xmm0,%xmm8 pxor %xmm7,%xmm15 pshufd $0x93,%xmm3,%xmm9 pxor %xmm8,%xmm0 pshufd $0x93,%xmm5,%xmm10 pxor %xmm9,%xmm3 pshufd $0x93,%xmm2,%xmm11 pxor %xmm10,%xmm5 pshufd $0x93,%xmm6,%xmm12 pxor %xmm11,%xmm2 pshufd $0x93,%xmm1,%xmm13 pxor %xmm12,%xmm6 pshufd $0x93,%xmm4,%xmm14 pxor %xmm13,%xmm1 pxor %xmm14,%xmm4 pxor %xmm15,%xmm8 pxor %xmm4,%xmm7 pxor %xmm4,%xmm8 pshufd $0x4E,%xmm15,%xmm15 pxor %xmm0,%xmm9 pshufd $0x4E,%xmm0,%xmm0 pxor %xmm2,%xmm12 pxor %xmm7,%xmm15 pxor %xmm6,%xmm13 pxor %xmm8,%xmm0 pxor %xmm5,%xmm11 pshufd $0x4E,%xmm2,%xmm7 pxor %xmm1,%xmm14 pshufd $0x4E,%xmm6,%xmm8 pxor %xmm3,%xmm10 pshufd $0x4E,%xmm5,%xmm2 pxor %xmm4,%xmm10 pshufd $0x4E,%xmm4,%xmm6 pxor %xmm4,%xmm11 pshufd $0x4E,%xmm1,%xmm5 pxor %xmm11,%xmm7 pshufd $0x4E,%xmm3,%xmm1 pxor %xmm12,%xmm8 pxor %xmm10,%xmm2 pxor %xmm14,%xmm6 pxor %xmm13,%xmm5 movdqa %xmm7,%xmm3 pxor %xmm9,%xmm1 movdqa %xmm8,%xmm4 movdqa 48(%r11),%xmm7 jnz .Lenc_loop movdqa 64(%r11),%xmm7 jmp .Lenc_loop .align 16 .Lenc_done: movdqa 0(%r11),%xmm7 movdqa 16(%r11),%xmm8 movdqa %xmm1,%xmm9 psrlq $1,%xmm1 movdqa %xmm2,%xmm10 psrlq $1,%xmm2 pxor %xmm4,%xmm1 pxor %xmm6,%xmm2 pand %xmm7,%xmm1 pand %xmm7,%xmm2 pxor %xmm1,%xmm4 psllq $1,%xmm1 pxor %xmm2,%xmm6 psllq $1,%xmm2 pxor %xmm9,%xmm1 pxor %xmm10,%xmm2 movdqa %xmm3,%xmm9 psrlq $1,%xmm3 movdqa %xmm15,%xmm10 psrlq $1,%xmm15 pxor %xmm5,%xmm3 pxor %xmm0,%xmm15 pand %xmm7,%xmm3 pand %xmm7,%xmm15 pxor %xmm3,%xmm5 psllq $1,%xmm3 pxor %xmm15,%xmm0 psllq $1,%xmm15 pxor %xmm9,%xmm3 pxor %xmm10,%xmm15 movdqa 32(%r11),%xmm7 movdqa %xmm6,%xmm9 psrlq $2,%xmm6 movdqa %xmm2,%xmm10 psrlq $2,%xmm2 pxor %xmm4,%xmm6 pxor %xmm1,%xmm2 pand %xmm8,%xmm6 pand %xmm8,%xmm2 pxor %xmm6,%xmm4 psllq $2,%xmm6 pxor %xmm2,%xmm1 psllq $2,%xmm2 pxor %xmm9,%xmm6 pxor %xmm10,%xmm2 movdqa %xmm0,%xmm9 psrlq $2,%xmm0 movdqa %xmm15,%xmm10 psrlq $2,%xmm15 pxor %xmm5,%xmm0 pxor %xmm3,%xmm15 pand %xmm8,%xmm0 pand %xmm8,%xmm15 pxor %xmm0,%xmm5 psllq $2,%xmm0 pxor %xmm15,%xmm3 psllq $2,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa %xmm5,%xmm9 psrlq $4,%xmm5 movdqa %xmm3,%xmm10 psrlq $4,%xmm3 pxor %xmm4,%xmm5 pxor %xmm1,%xmm3 pand %xmm7,%xmm5 pand %xmm7,%xmm3 pxor %xmm5,%xmm4 psllq $4,%xmm5 pxor %xmm3,%xmm1 psllq $4,%xmm3 pxor %xmm9,%xmm5 pxor %xmm10,%xmm3 movdqa %xmm0,%xmm9 psrlq $4,%xmm0 movdqa %xmm15,%xmm10 psrlq $4,%xmm15 pxor %xmm6,%xmm0 pxor %xmm2,%xmm15 pand %xmm7,%xmm0 pand %xmm7,%xmm15 pxor %xmm0,%xmm6 psllq $4,%xmm0 pxor %xmm15,%xmm2 psllq $4,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa (%rax),%xmm7 pxor %xmm7,%xmm3 pxor %xmm7,%xmm5 pxor %xmm7,%xmm2 pxor %xmm7,%xmm6 pxor %xmm7,%xmm1 pxor %xmm7,%xmm4 pxor %xmm7,%xmm15 pxor %xmm7,%xmm0 .byte 0xf3,0xc3 .size _bsaes_encrypt8,.-_bsaes_encrypt8 .type _bsaes_decrypt8,@function .align 64 _bsaes_decrypt8: leaq .LBS0(%rip),%r11 movdqa (%rax),%xmm8 leaq 16(%rax),%rax movdqa -48(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 pxor %xmm8,%xmm1 pxor %xmm8,%xmm2 .byte 102,68,15,56,0,255 .byte 102,15,56,0,199 pxor %xmm8,%xmm3 pxor %xmm8,%xmm4 .byte 102,15,56,0,207 .byte 102,15,56,0,215 pxor %xmm8,%xmm5 pxor %xmm8,%xmm6 .byte 102,15,56,0,223 .byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 movdqa 0(%r11),%xmm7 movdqa 16(%r11),%xmm8 movdqa %xmm5,%xmm9 psrlq $1,%xmm5 movdqa %xmm3,%xmm10 psrlq $1,%xmm3 pxor %xmm6,%xmm5 pxor %xmm4,%xmm3 pand %xmm7,%xmm5 pand %xmm7,%xmm3 pxor %xmm5,%xmm6 psllq $1,%xmm5 pxor %xmm3,%xmm4 psllq $1,%xmm3 pxor %xmm9,%xmm5 pxor %xmm10,%xmm3 movdqa %xmm1,%xmm9 psrlq $1,%xmm1 movdqa %xmm15,%xmm10 psrlq $1,%xmm15 pxor %xmm2,%xmm1 pxor %xmm0,%xmm15 pand %xmm7,%xmm1 pand %xmm7,%xmm15 pxor %xmm1,%xmm2 psllq $1,%xmm1 pxor %xmm15,%xmm0 psllq $1,%xmm15 pxor %xmm9,%xmm1 pxor %xmm10,%xmm15 movdqa 32(%r11),%xmm7 movdqa %xmm4,%xmm9 psrlq $2,%xmm4 movdqa %xmm3,%xmm10 psrlq $2,%xmm3 pxor %xmm6,%xmm4 pxor %xmm5,%xmm3 pand %xmm8,%xmm4 pand %xmm8,%xmm3 pxor %xmm4,%xmm6 psllq $2,%xmm4 pxor %xmm3,%xmm5 psllq $2,%xmm3 pxor %xmm9,%xmm4 pxor %xmm10,%xmm3 movdqa %xmm0,%xmm9 psrlq $2,%xmm0 movdqa %xmm15,%xmm10 psrlq $2,%xmm15 pxor %xmm2,%xmm0 pxor %xmm1,%xmm15 pand %xmm8,%xmm0 pand %xmm8,%xmm15 pxor %xmm0,%xmm2 psllq $2,%xmm0 pxor %xmm15,%xmm1 psllq $2,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa %xmm2,%xmm9 psrlq $4,%xmm2 movdqa %xmm1,%xmm10 psrlq $4,%xmm1 pxor %xmm6,%xmm2 pxor %xmm5,%xmm1 pand %xmm7,%xmm2 pand %xmm7,%xmm1 pxor %xmm2,%xmm6 psllq $4,%xmm2 pxor %xmm1,%xmm5 psllq $4,%xmm1 pxor %xmm9,%xmm2 pxor %xmm10,%xmm1 movdqa %xmm0,%xmm9 psrlq $4,%xmm0 movdqa %xmm15,%xmm10 psrlq $4,%xmm15 pxor %xmm4,%xmm0 pxor %xmm3,%xmm15 pand %xmm7,%xmm0 pand %xmm7,%xmm15 pxor %xmm0,%xmm4 psllq $4,%xmm0 pxor %xmm15,%xmm3 psllq $4,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 decl %r10d jmp .Ldec_sbox .align 16 .Ldec_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 pxor 32(%rax),%xmm1 pxor 48(%rax),%xmm2 .byte 102,68,15,56,0,255 .byte 102,15,56,0,199 pxor 64(%rax),%xmm3 pxor 80(%rax),%xmm4 .byte 102,15,56,0,207 .byte 102,15,56,0,215 pxor 96(%rax),%xmm5 pxor 112(%rax),%xmm6 .byte 102,15,56,0,223 .byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 leaq 128(%rax),%rax .Ldec_sbox: pxor %xmm3,%xmm2 pxor %xmm6,%xmm3 pxor %xmm6,%xmm1 pxor %xmm3,%xmm5 pxor %xmm5,%xmm6 pxor %xmm6,%xmm0 pxor %xmm0,%xmm15 pxor %xmm4,%xmm1 pxor %xmm15,%xmm2 pxor %xmm15,%xmm4 pxor %xmm2,%xmm0 movdqa %xmm2,%xmm10 movdqa %xmm6,%xmm9 movdqa %xmm0,%xmm8 movdqa %xmm3,%xmm12 movdqa %xmm4,%xmm11 pxor %xmm15,%xmm10 pxor %xmm3,%xmm9 pxor %xmm5,%xmm8 movdqa %xmm10,%xmm13 pxor %xmm15,%xmm12 movdqa %xmm9,%xmm7 pxor %xmm1,%xmm11 movdqa %xmm10,%xmm14 por %xmm8,%xmm9 por %xmm11,%xmm10 pxor %xmm7,%xmm14 pand %xmm11,%xmm13 pxor %xmm8,%xmm11 pand %xmm8,%xmm7 pand %xmm11,%xmm14 movdqa %xmm5,%xmm11 pxor %xmm1,%xmm11 pand %xmm11,%xmm12 pxor %xmm12,%xmm10 pxor %xmm12,%xmm9 movdqa %xmm2,%xmm12 movdqa %xmm0,%xmm11 pxor %xmm6,%xmm12 pxor %xmm4,%xmm11 movdqa %xmm12,%xmm8 pand %xmm11,%xmm12 por %xmm11,%xmm8 pxor %xmm12,%xmm7 pxor %xmm14,%xmm10 pxor %xmm13,%xmm9 pxor %xmm14,%xmm8 movdqa %xmm3,%xmm11 pxor %xmm13,%xmm7 movdqa %xmm15,%xmm12 pxor %xmm13,%xmm8 movdqa %xmm6,%xmm13 pand %xmm5,%xmm11 movdqa %xmm2,%xmm14 pand %xmm1,%xmm12 pand %xmm0,%xmm13 por %xmm4,%xmm14 pxor %xmm11,%xmm10 pxor %xmm12,%xmm9 pxor %xmm13,%xmm8 pxor %xmm14,%xmm7 movdqa %xmm10,%xmm11 pand %xmm8,%xmm10 pxor %xmm9,%xmm11 movdqa %xmm7,%xmm13 movdqa %xmm11,%xmm14 pxor %xmm10,%xmm13 pand %xmm13,%xmm14 movdqa %xmm8,%xmm12 pxor %xmm9,%xmm14 pxor %xmm7,%xmm12 pxor %xmm9,%xmm10 pand %xmm10,%xmm12 movdqa %xmm13,%xmm9 pxor %xmm7,%xmm12 pxor %xmm12,%xmm9 pxor %xmm12,%xmm8 pand %xmm7,%xmm9 pxor %xmm9,%xmm13 pxor %xmm9,%xmm8 pand %xmm14,%xmm13 pxor %xmm11,%xmm13 movdqa %xmm4,%xmm11 movdqa %xmm0,%xmm7 movdqa %xmm14,%xmm9 pxor %xmm13,%xmm9 pand %xmm4,%xmm9 pxor %xmm0,%xmm4 pand %xmm14,%xmm0 pand %xmm13,%xmm4 pxor %xmm0,%xmm4 pxor %xmm9,%xmm0 pxor %xmm1,%xmm11 pxor %xmm5,%xmm7 pxor %xmm12,%xmm14 pxor %xmm8,%xmm13 movdqa %xmm14,%xmm10 movdqa %xmm12,%xmm9 pxor %xmm13,%xmm10 pxor %xmm8,%xmm9 pand %xmm11,%xmm10 pand %xmm1,%xmm9 pxor %xmm7,%xmm11 pxor %xmm5,%xmm1 pand %xmm14,%xmm7 pand %xmm12,%xmm5 pand %xmm13,%xmm11 pand %xmm8,%xmm1 pxor %xmm11,%xmm7 pxor %xmm5,%xmm1 pxor %xmm10,%xmm11 pxor %xmm9,%xmm5 pxor %xmm11,%xmm4 pxor %xmm11,%xmm1 pxor %xmm7,%xmm0 pxor %xmm7,%xmm5 movdqa %xmm2,%xmm11 movdqa %xmm6,%xmm7 pxor %xmm15,%xmm11 pxor %xmm3,%xmm7 movdqa %xmm14,%xmm10 movdqa %xmm12,%xmm9 pxor %xmm13,%xmm10 pxor %xmm8,%xmm9 pand %xmm11,%xmm10 pand %xmm15,%xmm9 pxor %xmm7,%xmm11 pxor %xmm3,%xmm15 pand %xmm14,%xmm7 pand %xmm12,%xmm3 pand %xmm13,%xmm11 pand %xmm8,%xmm15 pxor %xmm11,%xmm7 pxor %xmm3,%xmm15 pxor %xmm10,%xmm11 pxor %xmm9,%xmm3 pxor %xmm12,%xmm14 pxor %xmm8,%xmm13 movdqa %xmm14,%xmm10 pxor %xmm13,%xmm10 pand %xmm2,%xmm10 pxor %xmm6,%xmm2 pand %xmm14,%xmm6 pand %xmm13,%xmm2 pxor %xmm6,%xmm2 pxor %xmm10,%xmm6 pxor %xmm11,%xmm2 pxor %xmm11,%xmm15 pxor %xmm7,%xmm6 pxor %xmm7,%xmm3 pxor %xmm6,%xmm0 pxor %xmm4,%xmm5 pxor %xmm0,%xmm3 pxor %xmm6,%xmm1 pxor %xmm6,%xmm4 pxor %xmm1,%xmm3 pxor %xmm15,%xmm6 pxor %xmm4,%xmm3 pxor %xmm5,%xmm2 pxor %xmm0,%xmm5 pxor %xmm3,%xmm2 pxor %xmm15,%xmm3 pxor %xmm2,%xmm6 decl %r10d jl .Ldec_done pshufd $0x4E,%xmm15,%xmm7 pshufd $0x4E,%xmm2,%xmm13 pxor %xmm15,%xmm7 pshufd $0x4E,%xmm4,%xmm14 pxor %xmm2,%xmm13 pshufd $0x4E,%xmm0,%xmm8 pxor %xmm4,%xmm14 pshufd $0x4E,%xmm5,%xmm9 pxor %xmm0,%xmm8 pshufd $0x4E,%xmm3,%xmm10 pxor %xmm5,%xmm9 pxor %xmm13,%xmm15 pxor %xmm13,%xmm0 pshufd $0x4E,%xmm1,%xmm11 pxor %xmm3,%xmm10 pxor %xmm7,%xmm5 pxor %xmm8,%xmm3 pshufd $0x4E,%xmm6,%xmm12 pxor %xmm1,%xmm11 pxor %xmm14,%xmm0 pxor %xmm9,%xmm1 pxor %xmm6,%xmm12 pxor %xmm14,%xmm5 pxor %xmm13,%xmm3 pxor %xmm13,%xmm1 pxor %xmm10,%xmm6 pxor %xmm11,%xmm2 pxor %xmm14,%xmm1 pxor %xmm14,%xmm6 pxor %xmm12,%xmm4 pshufd $0x93,%xmm15,%xmm7 pshufd $0x93,%xmm0,%xmm8 pxor %xmm7,%xmm15 pshufd $0x93,%xmm5,%xmm9 pxor %xmm8,%xmm0 pshufd $0x93,%xmm3,%xmm10 pxor %xmm9,%xmm5 pshufd $0x93,%xmm1,%xmm11 pxor %xmm10,%xmm3 pshufd $0x93,%xmm6,%xmm12 pxor %xmm11,%xmm1 pshufd $0x93,%xmm2,%xmm13 pxor %xmm12,%xmm6 pshufd $0x93,%xmm4,%xmm14 pxor %xmm13,%xmm2 pxor %xmm14,%xmm4 pxor %xmm15,%xmm8 pxor %xmm4,%xmm7 pxor %xmm4,%xmm8 pshufd $0x4E,%xmm15,%xmm15 pxor %xmm0,%xmm9 pshufd $0x4E,%xmm0,%xmm0 pxor %xmm1,%xmm12 pxor %xmm7,%xmm15 pxor %xmm6,%xmm13 pxor %xmm8,%xmm0 pxor %xmm3,%xmm11 pshufd $0x4E,%xmm1,%xmm7 pxor %xmm2,%xmm14 pshufd $0x4E,%xmm6,%xmm8 pxor %xmm5,%xmm10 pshufd $0x4E,%xmm3,%xmm1 pxor %xmm4,%xmm10 pshufd $0x4E,%xmm4,%xmm6 pxor %xmm4,%xmm11 pshufd $0x4E,%xmm2,%xmm3 pxor %xmm11,%xmm7 pshufd $0x4E,%xmm5,%xmm2 pxor %xmm12,%xmm8 pxor %xmm1,%xmm10 pxor %xmm14,%xmm6 pxor %xmm3,%xmm13 movdqa %xmm7,%xmm3 pxor %xmm9,%xmm2 movdqa %xmm13,%xmm5 movdqa %xmm8,%xmm4 movdqa %xmm2,%xmm1 movdqa %xmm10,%xmm2 movdqa -16(%r11),%xmm7 jnz .Ldec_loop movdqa -32(%r11),%xmm7 jmp .Ldec_loop .align 16 .Ldec_done: movdqa 0(%r11),%xmm7 movdqa 16(%r11),%xmm8 movdqa %xmm2,%xmm9 psrlq $1,%xmm2 movdqa %xmm1,%xmm10 psrlq $1,%xmm1 pxor %xmm4,%xmm2 pxor %xmm6,%xmm1 pand %xmm7,%xmm2 pand %xmm7,%xmm1 pxor %xmm2,%xmm4 psllq $1,%xmm2 pxor %xmm1,%xmm6 psllq $1,%xmm1 pxor %xmm9,%xmm2 pxor %xmm10,%xmm1 movdqa %xmm5,%xmm9 psrlq $1,%xmm5 movdqa %xmm15,%xmm10 psrlq $1,%xmm15 pxor %xmm3,%xmm5 pxor %xmm0,%xmm15 pand %xmm7,%xmm5 pand %xmm7,%xmm15 pxor %xmm5,%xmm3 psllq $1,%xmm5 pxor %xmm15,%xmm0 psllq $1,%xmm15 pxor %xmm9,%xmm5 pxor %xmm10,%xmm15 movdqa 32(%r11),%xmm7 movdqa %xmm6,%xmm9 psrlq $2,%xmm6 movdqa %xmm1,%xmm10 psrlq $2,%xmm1 pxor %xmm4,%xmm6 pxor %xmm2,%xmm1 pand %xmm8,%xmm6 pand %xmm8,%xmm1 pxor %xmm6,%xmm4 psllq $2,%xmm6 pxor %xmm1,%xmm2 psllq $2,%xmm1 pxor %xmm9,%xmm6 pxor %xmm10,%xmm1 movdqa %xmm0,%xmm9 psrlq $2,%xmm0 movdqa %xmm15,%xmm10 psrlq $2,%xmm15 pxor %xmm3,%xmm0 pxor %xmm5,%xmm15 pand %xmm8,%xmm0 pand %xmm8,%xmm15 pxor %xmm0,%xmm3 psllq $2,%xmm0 pxor %xmm15,%xmm5 psllq $2,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa %xmm3,%xmm9 psrlq $4,%xmm3 movdqa %xmm5,%xmm10 psrlq $4,%xmm5 pxor %xmm4,%xmm3 pxor %xmm2,%xmm5 pand %xmm7,%xmm3 pand %xmm7,%xmm5 pxor %xmm3,%xmm4 psllq $4,%xmm3 pxor %xmm5,%xmm2 psllq $4,%xmm5 pxor %xmm9,%xmm3 pxor %xmm10,%xmm5 movdqa %xmm0,%xmm9 psrlq $4,%xmm0 movdqa %xmm15,%xmm10 psrlq $4,%xmm15 pxor %xmm6,%xmm0 pxor %xmm1,%xmm15 pand %xmm7,%xmm0 pand %xmm7,%xmm15 pxor %xmm0,%xmm6 psllq $4,%xmm0 pxor %xmm15,%xmm1 psllq $4,%xmm15 pxor %xmm9,%xmm0 pxor %xmm10,%xmm15 movdqa (%rax),%xmm7 pxor %xmm7,%xmm5 pxor %xmm7,%xmm3 pxor %xmm7,%xmm1 pxor %xmm7,%xmm6 pxor %xmm7,%xmm2 pxor %xmm7,%xmm4 pxor %xmm7,%xmm15 pxor %xmm7,%xmm0 .byte 0xf3,0xc3 .size _bsaes_decrypt8,.-_bsaes_decrypt8 .type _bsaes_key_convert,@function .align 16 _bsaes_key_convert: leaq .Lmasks(%rip),%r11 movdqu (%rcx),%xmm7 leaq 16(%rcx),%rcx movdqa 0(%r11),%xmm0 movdqa 16(%r11),%xmm1 movdqa 32(%r11),%xmm2 movdqa 48(%r11),%xmm3 movdqa 64(%r11),%xmm4 pcmpeqd %xmm5,%xmm5 movdqu (%rcx),%xmm6 movdqa %xmm7,(%rax) leaq 16(%rax),%rax decl %r10d jmp .Lkey_loop .align 16 .Lkey_loop: .byte 102,15,56,0,244 movdqa %xmm0,%xmm8 movdqa %xmm1,%xmm9 pand %xmm6,%xmm8 pand %xmm6,%xmm9 movdqa %xmm2,%xmm10 pcmpeqb %xmm0,%xmm8 psllq $4,%xmm0 movdqa %xmm3,%xmm11 pcmpeqb %xmm1,%xmm9 psllq $4,%xmm1 pand %xmm6,%xmm10 pand %xmm6,%xmm11 movdqa %xmm0,%xmm12 pcmpeqb %xmm2,%xmm10 psllq $4,%xmm2 movdqa %xmm1,%xmm13 pcmpeqb %xmm3,%xmm11 psllq $4,%xmm3 movdqa %xmm2,%xmm14 movdqa %xmm3,%xmm15 pxor %xmm5,%xmm8 pxor %xmm5,%xmm9 pand %xmm6,%xmm12 pand %xmm6,%xmm13 movdqa %xmm8,0(%rax) pcmpeqb %xmm0,%xmm12 psrlq $4,%xmm0 movdqa %xmm9,16(%rax) pcmpeqb %xmm1,%xmm13 psrlq $4,%xmm1 leaq 16(%rcx),%rcx pand %xmm6,%xmm14 pand %xmm6,%xmm15 movdqa %xmm10,32(%rax) pcmpeqb %xmm2,%xmm14 psrlq $4,%xmm2 movdqa %xmm11,48(%rax) pcmpeqb %xmm3,%xmm15 psrlq $4,%xmm3 movdqu (%rcx),%xmm6 pxor %xmm5,%xmm13 pxor %xmm5,%xmm14 movdqa %xmm12,64(%rax) movdqa %xmm13,80(%rax) movdqa %xmm14,96(%rax) movdqa %xmm15,112(%rax) leaq 128(%rax),%rax decl %r10d jnz .Lkey_loop movdqa 80(%r11),%xmm7 .byte 0xf3,0xc3 .size _bsaes_key_convert,.-_bsaes_key_convert .globl bsaes_cbc_encrypt .type bsaes_cbc_encrypt,@function .align 16 bsaes_cbc_encrypt: cmpl $0,%r9d jne asm_AES_cbc_encrypt cmpq $128,%rdx jb asm_AES_cbc_encrypt movq %rsp,%rax .Lcbc_dec_prologue: pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq -72(%rsp),%rsp movq %rsp,%rbp movl 240(%rcx),%eax movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 movq %rcx,%r15 movq %r8,%rbx shrq $4,%r14 movl %eax,%edx shlq $7,%rax subq $96,%rax subq %rax,%rsp movq %rsp,%rax movq %r15,%rcx movl %edx,%r10d call _bsaes_key_convert pxor (%rsp),%xmm7 movdqa %xmm6,(%rax) movdqa %xmm7,(%rsp) movdqu (%rbx),%xmm14 subq $8,%r14 .Lcbc_dec_loop: movdqu 0(%r12),%xmm15 movdqu 16(%r12),%xmm0 movdqu 32(%r12),%xmm1 movdqu 48(%r12),%xmm2 movdqu 64(%r12),%xmm3 movdqu 80(%r12),%xmm4 movq %rsp,%rax movdqu 96(%r12),%xmm5 movl %edx,%r10d movdqu 112(%r12),%xmm6 movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm5 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm3 movdqu 64(%r12),%xmm11 pxor %xmm10,%xmm1 movdqu 80(%r12),%xmm12 pxor %xmm11,%xmm6 movdqu 96(%r12),%xmm13 pxor %xmm12,%xmm2 movdqu 112(%r12),%xmm14 pxor %xmm13,%xmm4 movdqu %xmm15,0(%r13) leaq 128(%r12),%r12 movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) movdqu %xmm1,64(%r13) movdqu %xmm6,80(%r13) movdqu %xmm2,96(%r13) movdqu %xmm4,112(%r13) leaq 128(%r13),%r13 subq $8,%r14 jnc .Lcbc_dec_loop addq $8,%r14 jz .Lcbc_dec_done movdqu 0(%r12),%xmm15 movq %rsp,%rax movl %edx,%r10d cmpq $2,%r14 jb .Lcbc_dec_one movdqu 16(%r12),%xmm0 je .Lcbc_dec_two movdqu 32(%r12),%xmm1 cmpq $4,%r14 jb .Lcbc_dec_three movdqu 48(%r12),%xmm2 je .Lcbc_dec_four movdqu 64(%r12),%xmm3 cmpq $6,%r14 jb .Lcbc_dec_five movdqu 80(%r12),%xmm4 je .Lcbc_dec_six movdqu 96(%r12),%xmm5 movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm5 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm3 movdqu 64(%r12),%xmm11 pxor %xmm10,%xmm1 movdqu 80(%r12),%xmm12 pxor %xmm11,%xmm6 movdqu 96(%r12),%xmm14 pxor %xmm12,%xmm2 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) movdqu %xmm1,64(%r13) movdqu %xmm6,80(%r13) movdqu %xmm2,96(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_six: movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm5 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm3 movdqu 64(%r12),%xmm11 pxor %xmm10,%xmm1 movdqu 80(%r12),%xmm14 pxor %xmm11,%xmm6 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) movdqu %xmm1,64(%r13) movdqu %xmm6,80(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_five: movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm5 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm3 movdqu 64(%r12),%xmm14 pxor %xmm10,%xmm1 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) movdqu %xmm1,64(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_four: movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm5 movdqu 48(%r12),%xmm14 pxor %xmm9,%xmm3 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_three: movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm0 movdqu 32(%r12),%xmm14 pxor %xmm8,%xmm5 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_two: movdqa %xmm14,32(%rbp) call _bsaes_decrypt8 pxor 32(%rbp),%xmm15 movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm14 pxor %xmm7,%xmm0 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) jmp .Lcbc_dec_done .align 16 .Lcbc_dec_one: leaq (%r12),%rdi leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_decrypt pxor 32(%rbp),%xmm14 movdqu %xmm14,(%r13) movdqa %xmm15,%xmm14 .Lcbc_dec_done: movdqu %xmm14,(%rbx) leaq (%rsp),%rax pxor %xmm0,%xmm0 .Lcbc_dec_bzero: movdqa %xmm0,0(%rax) movdqa %xmm0,16(%rax) leaq 32(%rax),%rax cmpq %rax,%rbp ja .Lcbc_dec_bzero leaq (%rbp),%rsp movq 72(%rsp),%r15 movq 80(%rsp),%r14 movq 88(%rsp),%r13 movq 96(%rsp),%r12 movq 104(%rsp),%rbx movq 112(%rsp),%rax leaq 120(%rsp),%rsp movq %rax,%rbp .Lcbc_dec_epilogue: .byte 0xf3,0xc3 .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt .globl bsaes_ctr32_encrypt_blocks .type bsaes_ctr32_encrypt_blocks,@function .align 16 bsaes_ctr32_encrypt_blocks: movq %rsp,%rax .Lctr_enc_prologue: pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq -72(%rsp),%rsp movq %rsp,%rbp movdqu (%r8),%xmm0 movl 240(%rcx),%eax movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 movq %rcx,%r15 movdqa %xmm0,32(%rbp) cmpq $8,%rdx jb .Lctr_enc_short movl %eax,%ebx shlq $7,%rax subq $96,%rax subq %rax,%rsp movq %rsp,%rax movq %r15,%rcx movl %ebx,%r10d call _bsaes_key_convert pxor %xmm6,%xmm7 movdqa %xmm7,(%rax) movdqa (%rsp),%xmm8 leaq .LADD1(%rip),%r11 movdqa 32(%rbp),%xmm15 movdqa -32(%r11),%xmm7 .byte 102,68,15,56,0,199 .byte 102,68,15,56,0,255 movdqa %xmm8,(%rsp) jmp .Lctr_enc_loop .align 16 .Lctr_enc_loop: movdqa %xmm15,32(%rbp) movdqa %xmm15,%xmm0 movdqa %xmm15,%xmm1 paddd 0(%r11),%xmm0 movdqa %xmm15,%xmm2 paddd 16(%r11),%xmm1 movdqa %xmm15,%xmm3 paddd 32(%r11),%xmm2 movdqa %xmm15,%xmm4 paddd 48(%r11),%xmm3 movdqa %xmm15,%xmm5 paddd 64(%r11),%xmm4 movdqa %xmm15,%xmm6 paddd 80(%r11),%xmm5 paddd 96(%r11),%xmm6 movdqa (%rsp),%xmm8 leaq 16(%rsp),%rax movdqa -16(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 pxor %xmm8,%xmm1 pxor %xmm8,%xmm2 .byte 102,68,15,56,0,255 .byte 102,15,56,0,199 pxor %xmm8,%xmm3 pxor %xmm8,%xmm4 .byte 102,15,56,0,207 .byte 102,15,56,0,215 pxor %xmm8,%xmm5 pxor %xmm8,%xmm6 .byte 102,15,56,0,223 .byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 leaq .LBS0(%rip),%r11 movl %ebx,%r10d call _bsaes_encrypt8_bitslice subq $8,%r14 jc .Lctr_enc_loop_done movdqu 0(%r12),%xmm7 movdqu 16(%r12),%xmm8 movdqu 32(%r12),%xmm9 movdqu 48(%r12),%xmm10 movdqu 64(%r12),%xmm11 movdqu 80(%r12),%xmm12 movdqu 96(%r12),%xmm13 movdqu 112(%r12),%xmm14 leaq 128(%r12),%r12 pxor %xmm15,%xmm7 movdqa 32(%rbp),%xmm15 pxor %xmm8,%xmm0 movdqu %xmm7,0(%r13) pxor %xmm9,%xmm3 movdqu %xmm0,16(%r13) pxor %xmm10,%xmm5 movdqu %xmm3,32(%r13) pxor %xmm11,%xmm2 movdqu %xmm5,48(%r13) pxor %xmm12,%xmm6 movdqu %xmm2,64(%r13) pxor %xmm13,%xmm1 movdqu %xmm6,80(%r13) pxor %xmm14,%xmm4 movdqu %xmm1,96(%r13) leaq .LADD1(%rip),%r11 movdqu %xmm4,112(%r13) leaq 128(%r13),%r13 paddd 112(%r11),%xmm15 jnz .Lctr_enc_loop jmp .Lctr_enc_done .align 16 .Lctr_enc_loop_done: addq $8,%r14 movdqu 0(%r12),%xmm7 pxor %xmm7,%xmm15 movdqu %xmm15,0(%r13) cmpq $2,%r14 jb .Lctr_enc_done movdqu 16(%r12),%xmm8 pxor %xmm8,%xmm0 movdqu %xmm0,16(%r13) je .Lctr_enc_done movdqu 32(%r12),%xmm9 pxor %xmm9,%xmm3 movdqu %xmm3,32(%r13) cmpq $4,%r14 jb .Lctr_enc_done movdqu 48(%r12),%xmm10 pxor %xmm10,%xmm5 movdqu %xmm5,48(%r13) je .Lctr_enc_done movdqu 64(%r12),%xmm11 pxor %xmm11,%xmm2 movdqu %xmm2,64(%r13) cmpq $6,%r14 jb .Lctr_enc_done movdqu 80(%r12),%xmm12 pxor %xmm12,%xmm6 movdqu %xmm6,80(%r13) je .Lctr_enc_done movdqu 96(%r12),%xmm13 pxor %xmm13,%xmm1 movdqu %xmm1,96(%r13) jmp .Lctr_enc_done .align 16 .Lctr_enc_short: leaq 32(%rbp),%rdi leaq 48(%rbp),%rsi leaq (%r15),%rdx call asm_AES_encrypt movdqu (%r12),%xmm0 leaq 16(%r12),%r12 movl 44(%rbp),%eax bswapl %eax pxor 48(%rbp),%xmm0 incl %eax movdqu %xmm0,(%r13) bswapl %eax leaq 16(%r13),%r13 movl %eax,44(%rsp) decq %r14 jnz .Lctr_enc_short .Lctr_enc_done: leaq (%rsp),%rax pxor %xmm0,%xmm0 .Lctr_enc_bzero: movdqa %xmm0,0(%rax) movdqa %xmm0,16(%rax) leaq 32(%rax),%rax cmpq %rax,%rbp ja .Lctr_enc_bzero leaq (%rbp),%rsp movq 72(%rsp),%r15 movq 80(%rsp),%r14 movq 88(%rsp),%r13 movq 96(%rsp),%r12 movq 104(%rsp),%rbx movq 112(%rsp),%rax leaq 120(%rsp),%rsp movq %rax,%rbp .Lctr_enc_epilogue: .byte 0xf3,0xc3 .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks .globl bsaes_xts_encrypt .type bsaes_xts_encrypt,@function .align 16 bsaes_xts_encrypt: movq %rsp,%rax .Lxts_enc_prologue: pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq -72(%rsp),%rsp movq %rsp,%rbp movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 movq %rcx,%r15 leaq (%r9),%rdi leaq 32(%rbp),%rsi leaq (%r8),%rdx call asm_AES_encrypt movl 240(%r15),%eax movq %r14,%rbx movl %eax,%edx shlq $7,%rax subq $96,%rax subq %rax,%rsp movq %rsp,%rax movq %r15,%rcx movl %edx,%r10d call _bsaes_key_convert pxor %xmm6,%xmm7 movdqa %xmm7,(%rax) andq $-16,%r14 subq $0x80,%rsp movdqa 32(%rbp),%xmm6 pxor %xmm14,%xmm14 movdqa .Lxts_magic(%rip),%xmm12 pcmpgtd %xmm6,%xmm14 subq $0x80,%r14 jc .Lxts_enc_short jmp .Lxts_enc_loop .align 16 .Lxts_enc_loop: pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm15 movdqa %xmm6,0(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm0 movdqa %xmm6,16(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 0(%r12),%xmm7 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm1 movdqa %xmm6,32(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm15 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm2 movdqa %xmm6,48(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm0 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm3 movdqa %xmm6,64(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm4 movdqa %xmm6,80(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 64(%r12),%xmm11 pxor %xmm10,%xmm2 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm5 movdqa %xmm6,96(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 80(%r12),%xmm12 pxor %xmm11,%xmm3 movdqu 96(%r12),%xmm13 pxor %xmm12,%xmm4 movdqu 112(%r12),%xmm14 leaq 128(%r12),%r12 movdqa %xmm6,112(%rsp) pxor %xmm13,%xmm5 leaq 128(%rsp),%rax pxor %xmm14,%xmm6 movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm5 movdqu %xmm3,32(%r13) pxor 64(%rsp),%xmm2 movdqu %xmm5,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm2,64(%r13) pxor 96(%rsp),%xmm1 movdqu %xmm6,80(%r13) pxor 112(%rsp),%xmm4 movdqu %xmm1,96(%r13) movdqu %xmm4,112(%r13) leaq 128(%r13),%r13 movdqa 112(%rsp),%xmm6 pxor %xmm14,%xmm14 movdqa .Lxts_magic(%rip),%xmm12 pcmpgtd %xmm6,%xmm14 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 subq $0x80,%r14 jnc .Lxts_enc_loop .Lxts_enc_short: addq $0x80,%r14 jz .Lxts_enc_done pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm15 movdqa %xmm6,0(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm0 movdqa %xmm6,16(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 0(%r12),%xmm7 cmpq $16,%r14 je .Lxts_enc_1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm1 movdqa %xmm6,32(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 16(%r12),%xmm8 cmpq $32,%r14 je .Lxts_enc_2 pxor %xmm7,%xmm15 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm2 movdqa %xmm6,48(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 32(%r12),%xmm9 cmpq $48,%r14 je .Lxts_enc_3 pxor %xmm8,%xmm0 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm3 movdqa %xmm6,64(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 48(%r12),%xmm10 cmpq $64,%r14 je .Lxts_enc_4 pxor %xmm9,%xmm1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm4 movdqa %xmm6,80(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 64(%r12),%xmm11 cmpq $80,%r14 je .Lxts_enc_5 pxor %xmm10,%xmm2 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm5 movdqa %xmm6,96(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 80(%r12),%xmm12 cmpq $96,%r14 je .Lxts_enc_6 pxor %xmm11,%xmm3 movdqu 96(%r12),%xmm13 pxor %xmm12,%xmm4 movdqa %xmm6,112(%rsp) leaq 112(%r12),%r12 pxor %xmm13,%xmm5 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm5 movdqu %xmm3,32(%r13) pxor 64(%rsp),%xmm2 movdqu %xmm5,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm2,64(%r13) pxor 96(%rsp),%xmm1 movdqu %xmm6,80(%r13) movdqu %xmm1,96(%r13) leaq 112(%r13),%r13 movdqa 112(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_6: pxor %xmm11,%xmm3 leaq 96(%r12),%r12 pxor %xmm12,%xmm4 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm5 movdqu %xmm3,32(%r13) pxor 64(%rsp),%xmm2 movdqu %xmm5,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm2,64(%r13) movdqu %xmm6,80(%r13) leaq 96(%r13),%r13 movdqa 96(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_5: pxor %xmm10,%xmm2 leaq 80(%r12),%r12 pxor %xmm11,%xmm3 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm5 movdqu %xmm3,32(%r13) pxor 64(%rsp),%xmm2 movdqu %xmm5,48(%r13) movdqu %xmm2,64(%r13) leaq 80(%r13),%r13 movdqa 80(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_4: pxor %xmm9,%xmm1 leaq 64(%r12),%r12 pxor %xmm10,%xmm2 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm5 movdqu %xmm3,32(%r13) movdqu %xmm5,48(%r13) leaq 64(%r13),%r13 movdqa 64(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_3: pxor %xmm8,%xmm0 leaq 48(%r12),%r12 pxor %xmm9,%xmm1 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm3 movdqu %xmm0,16(%r13) movdqu %xmm3,32(%r13) leaq 48(%r13),%r13 movdqa 48(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_2: pxor %xmm7,%xmm15 leaq 32(%r12),%r12 pxor %xmm8,%xmm0 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_encrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) leaq 32(%r13),%r13 movdqa 32(%rsp),%xmm6 jmp .Lxts_enc_done .align 16 .Lxts_enc_1: pxor %xmm15,%xmm7 leaq 16(%r12),%r12 movdqa %xmm7,32(%rbp) leaq 32(%rbp),%rdi leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_encrypt pxor 32(%rbp),%xmm15 movdqu %xmm15,0(%r13) leaq 16(%r13),%r13 movdqa 16(%rsp),%xmm6 .Lxts_enc_done: andl $15,%ebx jz .Lxts_enc_ret movq %r13,%rdx .Lxts_enc_steal: movzbl (%r12),%eax movzbl -16(%rdx),%ecx leaq 1(%r12),%r12 movb %al,-16(%rdx) movb %cl,0(%rdx) leaq 1(%rdx),%rdx subl $1,%ebx jnz .Lxts_enc_steal movdqu -16(%r13),%xmm15 leaq 32(%rbp),%rdi pxor %xmm6,%xmm15 leaq 32(%rbp),%rsi movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_encrypt pxor 32(%rbp),%xmm6 movdqu %xmm6,-16(%r13) .Lxts_enc_ret: leaq (%rsp),%rax pxor %xmm0,%xmm0 .Lxts_enc_bzero: movdqa %xmm0,0(%rax) movdqa %xmm0,16(%rax) leaq 32(%rax),%rax cmpq %rax,%rbp ja .Lxts_enc_bzero leaq (%rbp),%rsp movq 72(%rsp),%r15 movq 80(%rsp),%r14 movq 88(%rsp),%r13 movq 96(%rsp),%r12 movq 104(%rsp),%rbx movq 112(%rsp),%rax leaq 120(%rsp),%rsp movq %rax,%rbp .Lxts_enc_epilogue: .byte 0xf3,0xc3 .size bsaes_xts_encrypt,.-bsaes_xts_encrypt .globl bsaes_xts_decrypt .type bsaes_xts_decrypt,@function .align 16 bsaes_xts_decrypt: movq %rsp,%rax .Lxts_dec_prologue: pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq -72(%rsp),%rsp movq %rsp,%rbp movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 movq %rcx,%r15 leaq (%r9),%rdi leaq 32(%rbp),%rsi leaq (%r8),%rdx call asm_AES_encrypt movl 240(%r15),%eax movq %r14,%rbx movl %eax,%edx shlq $7,%rax subq $96,%rax subq %rax,%rsp movq %rsp,%rax movq %r15,%rcx movl %edx,%r10d call _bsaes_key_convert pxor (%rsp),%xmm7 movdqa %xmm6,(%rax) movdqa %xmm7,(%rsp) xorl %eax,%eax andq $-16,%r14 testl $15,%ebx setnz %al shlq $4,%rax subq %rax,%r14 subq $0x80,%rsp movdqa 32(%rbp),%xmm6 pxor %xmm14,%xmm14 movdqa .Lxts_magic(%rip),%xmm12 pcmpgtd %xmm6,%xmm14 subq $0x80,%r14 jc .Lxts_dec_short jmp .Lxts_dec_loop .align 16 .Lxts_dec_loop: pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm15 movdqa %xmm6,0(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm0 movdqa %xmm6,16(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 0(%r12),%xmm7 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm1 movdqa %xmm6,32(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 16(%r12),%xmm8 pxor %xmm7,%xmm15 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm2 movdqa %xmm6,48(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 32(%r12),%xmm9 pxor %xmm8,%xmm0 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm3 movdqa %xmm6,64(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 48(%r12),%xmm10 pxor %xmm9,%xmm1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm4 movdqa %xmm6,80(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 64(%r12),%xmm11 pxor %xmm10,%xmm2 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm5 movdqa %xmm6,96(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 80(%r12),%xmm12 pxor %xmm11,%xmm3 movdqu 96(%r12),%xmm13 pxor %xmm12,%xmm4 movdqu 112(%r12),%xmm14 leaq 128(%r12),%r12 movdqa %xmm6,112(%rsp) pxor %xmm13,%xmm5 leaq 128(%rsp),%rax pxor %xmm14,%xmm6 movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm3 movdqu %xmm5,32(%r13) pxor 64(%rsp),%xmm1 movdqu %xmm3,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm1,64(%r13) pxor 96(%rsp),%xmm2 movdqu %xmm6,80(%r13) pxor 112(%rsp),%xmm4 movdqu %xmm2,96(%r13) movdqu %xmm4,112(%r13) leaq 128(%r13),%r13 movdqa 112(%rsp),%xmm6 pxor %xmm14,%xmm14 movdqa .Lxts_magic(%rip),%xmm12 pcmpgtd %xmm6,%xmm14 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 subq $0x80,%r14 jnc .Lxts_dec_loop .Lxts_dec_short: addq $0x80,%r14 jz .Lxts_dec_done pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm15 movdqa %xmm6,0(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm0 movdqa %xmm6,16(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 0(%r12),%xmm7 cmpq $16,%r14 je .Lxts_dec_1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm1 movdqa %xmm6,32(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 16(%r12),%xmm8 cmpq $32,%r14 je .Lxts_dec_2 pxor %xmm7,%xmm15 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm2 movdqa %xmm6,48(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 32(%r12),%xmm9 cmpq $48,%r14 je .Lxts_dec_3 pxor %xmm8,%xmm0 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm3 movdqa %xmm6,64(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 48(%r12),%xmm10 cmpq $64,%r14 je .Lxts_dec_4 pxor %xmm9,%xmm1 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm4 movdqa %xmm6,80(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 64(%r12),%xmm11 cmpq $80,%r14 je .Lxts_dec_5 pxor %xmm10,%xmm2 pshufd $0x13,%xmm14,%xmm13 pxor %xmm14,%xmm14 movdqa %xmm6,%xmm5 movdqa %xmm6,96(%rsp) paddq %xmm6,%xmm6 pand %xmm12,%xmm13 pcmpgtd %xmm6,%xmm14 pxor %xmm13,%xmm6 movdqu 80(%r12),%xmm12 cmpq $96,%r14 je .Lxts_dec_6 pxor %xmm11,%xmm3 movdqu 96(%r12),%xmm13 pxor %xmm12,%xmm4 movdqa %xmm6,112(%rsp) leaq 112(%r12),%r12 pxor %xmm13,%xmm5 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm3 movdqu %xmm5,32(%r13) pxor 64(%rsp),%xmm1 movdqu %xmm3,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm1,64(%r13) pxor 96(%rsp),%xmm2 movdqu %xmm6,80(%r13) movdqu %xmm2,96(%r13) leaq 112(%r13),%r13 movdqa 112(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_6: pxor %xmm11,%xmm3 leaq 96(%r12),%r12 pxor %xmm12,%xmm4 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm3 movdqu %xmm5,32(%r13) pxor 64(%rsp),%xmm1 movdqu %xmm3,48(%r13) pxor 80(%rsp),%xmm6 movdqu %xmm1,64(%r13) movdqu %xmm6,80(%r13) leaq 96(%r13),%r13 movdqa 96(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_5: pxor %xmm10,%xmm2 leaq 80(%r12),%r12 pxor %xmm11,%xmm3 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm3 movdqu %xmm5,32(%r13) pxor 64(%rsp),%xmm1 movdqu %xmm3,48(%r13) movdqu %xmm1,64(%r13) leaq 80(%r13),%r13 movdqa 80(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_4: pxor %xmm9,%xmm1 leaq 64(%r12),%r12 pxor %xmm10,%xmm2 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) pxor 48(%rsp),%xmm3 movdqu %xmm5,32(%r13) movdqu %xmm3,48(%r13) leaq 64(%r13),%r13 movdqa 64(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_3: pxor %xmm8,%xmm0 leaq 48(%r12),%r12 pxor %xmm9,%xmm1 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) pxor 32(%rsp),%xmm5 movdqu %xmm0,16(%r13) movdqu %xmm5,32(%r13) leaq 48(%r13),%r13 movdqa 48(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_2: pxor %xmm7,%xmm15 leaq 32(%r12),%r12 pxor %xmm8,%xmm0 leaq 128(%rsp),%rax movl %edx,%r10d call _bsaes_decrypt8 pxor 0(%rsp),%xmm15 pxor 16(%rsp),%xmm0 movdqu %xmm15,0(%r13) movdqu %xmm0,16(%r13) leaq 32(%r13),%r13 movdqa 32(%rsp),%xmm6 jmp .Lxts_dec_done .align 16 .Lxts_dec_1: pxor %xmm15,%xmm7 leaq 16(%r12),%r12 movdqa %xmm7,32(%rbp) leaq 32(%rbp),%rdi leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_decrypt pxor 32(%rbp),%xmm15 movdqu %xmm15,0(%r13) leaq 16(%r13),%r13 movdqa 16(%rsp),%xmm6 .Lxts_dec_done: andl $15,%ebx jz .Lxts_dec_ret pxor %xmm14,%xmm14 movdqa .Lxts_magic(%rip),%xmm12 pcmpgtd %xmm6,%xmm14 pshufd $0x13,%xmm14,%xmm13 movdqa %xmm6,%xmm5 paddq %xmm6,%xmm6 pand %xmm12,%xmm13 movdqu (%r12),%xmm15 pxor %xmm13,%xmm6 leaq 32(%rbp),%rdi pxor %xmm6,%xmm15 leaq 32(%rbp),%rsi movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_decrypt pxor 32(%rbp),%xmm6 movq %r13,%rdx movdqu %xmm6,(%r13) .Lxts_dec_steal: movzbl 16(%r12),%eax movzbl (%rdx),%ecx leaq 1(%r12),%r12 movb %al,(%rdx) movb %cl,16(%rdx) leaq 1(%rdx),%rdx subl $1,%ebx jnz .Lxts_dec_steal movdqu (%r13),%xmm15 leaq 32(%rbp),%rdi pxor %xmm5,%xmm15 leaq 32(%rbp),%rsi movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_decrypt pxor 32(%rbp),%xmm5 movdqu %xmm5,(%r13) .Lxts_dec_ret: leaq (%rsp),%rax pxor %xmm0,%xmm0 .Lxts_dec_bzero: movdqa %xmm0,0(%rax) movdqa %xmm0,16(%rax) leaq 32(%rax),%rax cmpq %rax,%rbp ja .Lxts_dec_bzero leaq (%rbp),%rsp movq 72(%rsp),%r15 movq 80(%rsp),%r14 movq 88(%rsp),%r13 movq 96(%rsp),%r12 movq 104(%rsp),%rbx movq 112(%rsp),%rax leaq 120(%rsp),%rsp movq %rax,%rbp .Lxts_dec_epilogue: .byte 0xf3,0xc3 .size bsaes_xts_decrypt,.-bsaes_xts_decrypt .type _bsaes_const,@object .align 64 _bsaes_const: .LM0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 .LISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d .LISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 .LBS0: .quad 0x5555555555555555, 0x5555555555555555 .LBS1: .quad 0x3333333333333333, 0x3333333333333333 .LBS2: .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f .LSR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b .LSRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d .LM0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 .LSWPUP: .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 .LSWPUPM0SR: .quad 0x0a0d02060c03070b, 0x0004080f05090e01 .LADD1: .quad 0x0000000000000000, 0x0000000100000000 .LADD2: .quad 0x0000000000000000, 0x0000000200000000 .LADD3: .quad 0x0000000000000000, 0x0000000300000000 .LADD4: .quad 0x0000000000000000, 0x0000000400000000 .LADD5: .quad 0x0000000000000000, 0x0000000500000000 .LADD6: .quad 0x0000000000000000, 0x0000000600000000 .LADD7: .quad 0x0000000000000000, 0x0000000700000000 .LADD8: .quad 0x0000000000000000, 0x0000000800000000 .Lxts_magic: .long 0x87,0,1,0 .Lmasks: .quad 0x0101010101010101, 0x0101010101010101 .quad 0x0202020202020202, 0x0202020202020202 .quad 0x0404040404040404, 0x0404040404040404 .quad 0x0808080808080808, 0x0808080808080808 .LM0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d .L63: .quad 0x6363636363636363, 0x6363636363636363 .byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 .align 64 .size _bsaes_const,.-_bsaes_const Index: head/secure/lib/libcrypto/amd64/cmll-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/cmll-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/cmll-x86_64.S (revision 299481) @@ -1,1839 +1,1840 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from cmll-x86_64.pl. .text .globl Camellia_EncryptBlock .type Camellia_EncryptBlock,@function .align 16 Camellia_EncryptBlock: movl $128,%eax subl %edi,%eax movl $3,%edi adcl $0,%edi jmp .Lenc_rounds .size Camellia_EncryptBlock,.-Camellia_EncryptBlock .globl Camellia_EncryptBlock_Rounds .type Camellia_EncryptBlock_Rounds,@function .align 16 .Lenc_rounds: Camellia_EncryptBlock_Rounds: pushq %rbx pushq %rbp pushq %r13 pushq %r14 pushq %r15 .Lenc_prologue: movq %rcx,%r13 movq %rdx,%r14 shll $6,%edi leaq .LCamellia_SBOX(%rip),%rbp leaq (%r14,%rdi,1),%r15 movl 0(%rsi),%r8d movl 4(%rsi),%r9d movl 8(%rsi),%r10d bswapl %r8d movl 12(%rsi),%r11d bswapl %r9d bswapl %r10d bswapl %r11d call _x86_64_Camellia_encrypt bswapl %r8d bswapl %r9d bswapl %r10d movl %r8d,0(%r13) bswapl %r11d movl %r9d,4(%r13) movl %r10d,8(%r13) movl %r11d,12(%r13) movq 0(%rsp),%r15 movq 8(%rsp),%r14 movq 16(%rsp),%r13 movq 24(%rsp),%rbp movq 32(%rsp),%rbx leaq 40(%rsp),%rsp .Lenc_epilogue: .byte 0xf3,0xc3 .size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds .type _x86_64_Camellia_encrypt,@function .align 16 _x86_64_Camellia_encrypt: xorl 0(%r14),%r9d xorl 4(%r14),%r8d xorl 8(%r14),%r11d xorl 12(%r14),%r10d .align 16 .Leloop: movl 16(%r14),%ebx movl 20(%r14),%eax xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 24(%r14),%ebx movl 28(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 32(%r14),%ebx movl 36(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 40(%r14),%ebx movl 44(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 48(%r14),%ebx movl 52(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 56(%r14),%ebx movl 60(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 64(%r14),%ebx movl 68(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d leaq 64(%r14),%r14 cmpq %r15,%r14 movl 8(%r14),%edx movl 12(%r14),%ecx je .Ledone andl %r8d,%eax orl %r11d,%edx roll $1,%eax xorl %edx,%r10d xorl %eax,%r9d andl %r10d,%ecx orl %r9d,%ebx roll $1,%ecx xorl %ebx,%r8d xorl %ecx,%r11d jmp .Leloop .align 16 .Ledone: xorl %r10d,%eax xorl %r11d,%ebx xorl %r8d,%ecx xorl %r9d,%edx movl %eax,%r8d movl %ebx,%r9d movl %ecx,%r10d movl %edx,%r11d .byte 0xf3,0xc3 .size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt .globl Camellia_DecryptBlock .type Camellia_DecryptBlock,@function .align 16 Camellia_DecryptBlock: movl $128,%eax subl %edi,%eax movl $3,%edi adcl $0,%edi jmp .Ldec_rounds .size Camellia_DecryptBlock,.-Camellia_DecryptBlock .globl Camellia_DecryptBlock_Rounds .type Camellia_DecryptBlock_Rounds,@function .align 16 .Ldec_rounds: Camellia_DecryptBlock_Rounds: pushq %rbx pushq %rbp pushq %r13 pushq %r14 pushq %r15 .Ldec_prologue: movq %rcx,%r13 movq %rdx,%r15 shll $6,%edi leaq .LCamellia_SBOX(%rip),%rbp leaq (%r15,%rdi,1),%r14 movl 0(%rsi),%r8d movl 4(%rsi),%r9d movl 8(%rsi),%r10d bswapl %r8d movl 12(%rsi),%r11d bswapl %r9d bswapl %r10d bswapl %r11d call _x86_64_Camellia_decrypt bswapl %r8d bswapl %r9d bswapl %r10d movl %r8d,0(%r13) bswapl %r11d movl %r9d,4(%r13) movl %r10d,8(%r13) movl %r11d,12(%r13) movq 0(%rsp),%r15 movq 8(%rsp),%r14 movq 16(%rsp),%r13 movq 24(%rsp),%rbp movq 32(%rsp),%rbx leaq 40(%rsp),%rsp .Ldec_epilogue: .byte 0xf3,0xc3 .size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds .type _x86_64_Camellia_decrypt,@function .align 16 _x86_64_Camellia_decrypt: xorl 0(%r14),%r9d xorl 4(%r14),%r8d xorl 8(%r14),%r11d xorl 12(%r14),%r10d .align 16 .Ldloop: movl -8(%r14),%ebx movl -4(%r14),%eax xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -16(%r14),%ebx movl -12(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -24(%r14),%ebx movl -20(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -32(%r14),%ebx movl -28(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -40(%r14),%ebx movl -36(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -48(%r14),%ebx movl -44(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl -56(%r14),%ebx movl -52(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d leaq -64(%r14),%r14 cmpq %r15,%r14 movl 0(%r14),%edx movl 4(%r14),%ecx je .Lddone andl %r8d,%eax orl %r11d,%edx roll $1,%eax xorl %edx,%r10d xorl %eax,%r9d andl %r10d,%ecx orl %r9d,%ebx roll $1,%ecx xorl %ebx,%r8d xorl %ecx,%r11d jmp .Ldloop .align 16 .Lddone: xorl %r10d,%ecx xorl %r11d,%edx xorl %r8d,%eax xorl %r9d,%ebx movl %ecx,%r8d movl %edx,%r9d movl %eax,%r10d movl %ebx,%r11d .byte 0xf3,0xc3 .size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt .globl Camellia_Ekeygen .type Camellia_Ekeygen,@function .align 16 Camellia_Ekeygen: pushq %rbx pushq %rbp pushq %r13 pushq %r14 pushq %r15 .Lkey_prologue: movl %edi,%r15d movq %rdx,%r13 movl 0(%rsi),%r8d movl 4(%rsi),%r9d movl 8(%rsi),%r10d movl 12(%rsi),%r11d bswapl %r8d bswapl %r9d bswapl %r10d bswapl %r11d movl %r9d,0(%r13) movl %r8d,4(%r13) movl %r11d,8(%r13) movl %r10d,12(%r13) cmpq $128,%r15 je .L1st128 movl 16(%rsi),%r8d movl 20(%rsi),%r9d cmpq $192,%r15 je .L1st192 movl 24(%rsi),%r10d movl 28(%rsi),%r11d jmp .L1st256 .L1st192: movl %r8d,%r10d movl %r9d,%r11d notl %r10d notl %r11d .L1st256: bswapl %r8d bswapl %r9d bswapl %r10d bswapl %r11d movl %r9d,32(%r13) movl %r8d,36(%r13) movl %r11d,40(%r13) movl %r10d,44(%r13) xorl 0(%r13),%r9d xorl 4(%r13),%r8d xorl 8(%r13),%r11d xorl 12(%r13),%r10d .L1st128: leaq .LCamellia_SIGMA(%rip),%r14 leaq .LCamellia_SBOX(%rip),%rbp movl 0(%r14),%ebx movl 4(%r14),%eax xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 8(%r14),%ebx movl 12(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 16(%r14),%ebx movl 20(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d xorl 0(%r13),%r9d xorl 4(%r13),%r8d xorl 8(%r13),%r11d xorl 12(%r13),%r10d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 24(%r14),%ebx movl 28(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 32(%r14),%ebx movl 36(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d cmpq $128,%r15 jne .L2nd256 leaq 128(%r13),%r13 shlq $32,%r8 shlq $32,%r10 orq %r9,%r8 orq %r11,%r10 movq -128(%r13),%rax movq -120(%r13),%rbx movq %r8,-112(%r13) movq %r10,-104(%r13) movq %rax,%r11 shlq $15,%rax movq %rbx,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%rax shlq $15,%rbx orq %r11,%rbx movq %rax,-96(%r13) movq %rbx,-88(%r13) movq %r8,%r11 shlq $15,%r8 movq %r10,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%r8 shlq $15,%r10 orq %r11,%r10 movq %r8,-80(%r13) movq %r10,-72(%r13) movq %r8,%r11 shlq $15,%r8 movq %r10,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%r8 shlq $15,%r10 orq %r11,%r10 movq %r8,-64(%r13) movq %r10,-56(%r13) movq %rax,%r11 shlq $30,%rax movq %rbx,%r9 shrq $34,%r9 shrq $34,%r11 orq %r9,%rax shlq $30,%rbx orq %r11,%rbx movq %rax,-48(%r13) movq %rbx,-40(%r13) movq %r8,%r11 shlq $15,%r8 movq %r10,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%r8 shlq $15,%r10 orq %r11,%r10 movq %r8,-32(%r13) movq %rax,%r11 shlq $15,%rax movq %rbx,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%rax shlq $15,%rbx orq %r11,%rbx movq %rbx,-24(%r13) movq %r8,%r11 shlq $15,%r8 movq %r10,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%r8 shlq $15,%r10 orq %r11,%r10 movq %r8,-16(%r13) movq %r10,-8(%r13) movq %rax,%r11 shlq $17,%rax movq %rbx,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%rax shlq $17,%rbx orq %r11,%rbx movq %rax,0(%r13) movq %rbx,8(%r13) movq %rax,%r11 shlq $17,%rax movq %rbx,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%rax shlq $17,%rbx orq %r11,%rbx movq %rax,16(%r13) movq %rbx,24(%r13) movq %r8,%r11 shlq $34,%r8 movq %r10,%r9 shrq $30,%r9 shrq $30,%r11 orq %r9,%r8 shlq $34,%r10 orq %r11,%r10 movq %r8,32(%r13) movq %r10,40(%r13) movq %rax,%r11 shlq $17,%rax movq %rbx,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%rax shlq $17,%rbx orq %r11,%rbx movq %rax,48(%r13) movq %rbx,56(%r13) movq %r8,%r11 shlq $17,%r8 movq %r10,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%r8 shlq $17,%r10 orq %r11,%r10 movq %r8,64(%r13) movq %r10,72(%r13) movl $3,%eax jmp .Ldone .align 16 .L2nd256: movl %r9d,48(%r13) movl %r8d,52(%r13) movl %r11d,56(%r13) movl %r10d,60(%r13) xorl 32(%r13),%r9d xorl 36(%r13),%r8d xorl 40(%r13),%r11d xorl 44(%r13),%r10d xorl %r8d,%eax xorl %r9d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 40(%r14),%ebx movl 44(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r10d xorl %ecx,%r11d xorl %edx,%r11d xorl %r10d,%eax xorl %r11d,%ebx movzbl %ah,%esi movzbl %bl,%edi movl 2052(%rbp,%rsi,8),%edx movl 0(%rbp,%rdi,8),%ecx movzbl %al,%esi shrl $16,%eax movzbl %bh,%edi xorl 4(%rbp,%rsi,8),%edx shrl $16,%ebx xorl 4(%rbp,%rdi,8),%ecx movzbl %ah,%esi movzbl %bl,%edi xorl 0(%rbp,%rsi,8),%edx xorl 2052(%rbp,%rdi,8),%ecx movzbl %al,%esi movzbl %bh,%edi xorl 2048(%rbp,%rsi,8),%edx xorl 2048(%rbp,%rdi,8),%ecx movl 48(%r14),%ebx movl 52(%r14),%eax xorl %edx,%ecx rorl $8,%edx xorl %ecx,%r8d xorl %ecx,%r9d xorl %edx,%r9d movq 0(%r13),%rax movq 8(%r13),%rbx movq 32(%r13),%rcx movq 40(%r13),%rdx movq 48(%r13),%r14 movq 56(%r13),%r15 leaq 128(%r13),%r13 shlq $32,%r8 shlq $32,%r10 orq %r9,%r8 orq %r11,%r10 movq %r8,-112(%r13) movq %r10,-104(%r13) movq %rcx,%r11 shlq $15,%rcx movq %rdx,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%rcx shlq $15,%rdx orq %r11,%rdx movq %rcx,-96(%r13) movq %rdx,-88(%r13) movq %r14,%r11 shlq $15,%r14 movq %r15,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%r14 shlq $15,%r15 orq %r11,%r15 movq %r14,-80(%r13) movq %r15,-72(%r13) movq %rcx,%r11 shlq $15,%rcx movq %rdx,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%rcx shlq $15,%rdx orq %r11,%rdx movq %rcx,-64(%r13) movq %rdx,-56(%r13) movq %r8,%r11 shlq $30,%r8 movq %r10,%r9 shrq $34,%r9 shrq $34,%r11 orq %r9,%r8 shlq $30,%r10 orq %r11,%r10 movq %r8,-48(%r13) movq %r10,-40(%r13) movq %rax,%r11 shlq $45,%rax movq %rbx,%r9 shrq $19,%r9 shrq $19,%r11 orq %r9,%rax shlq $45,%rbx orq %r11,%rbx movq %rax,-32(%r13) movq %rbx,-24(%r13) movq %r14,%r11 shlq $30,%r14 movq %r15,%r9 shrq $34,%r9 shrq $34,%r11 orq %r9,%r14 shlq $30,%r15 orq %r11,%r15 movq %r14,-16(%r13) movq %r15,-8(%r13) movq %rax,%r11 shlq $15,%rax movq %rbx,%r9 shrq $49,%r9 shrq $49,%r11 orq %r9,%rax shlq $15,%rbx orq %r11,%rbx movq %rax,0(%r13) movq %rbx,8(%r13) movq %rcx,%r11 shlq $30,%rcx movq %rdx,%r9 shrq $34,%r9 shrq $34,%r11 orq %r9,%rcx shlq $30,%rdx orq %r11,%rdx movq %rcx,16(%r13) movq %rdx,24(%r13) movq %r8,%r11 shlq $30,%r8 movq %r10,%r9 shrq $34,%r9 shrq $34,%r11 orq %r9,%r8 shlq $30,%r10 orq %r11,%r10 movq %r8,32(%r13) movq %r10,40(%r13) movq %rax,%r11 shlq $17,%rax movq %rbx,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%rax shlq $17,%rbx orq %r11,%rbx movq %rax,48(%r13) movq %rbx,56(%r13) movq %r14,%r11 shlq $32,%r14 movq %r15,%r9 shrq $32,%r9 shrq $32,%r11 orq %r9,%r14 shlq $32,%r15 orq %r11,%r15 movq %r14,64(%r13) movq %r15,72(%r13) movq %rcx,%r11 shlq $34,%rcx movq %rdx,%r9 shrq $30,%r9 shrq $30,%r11 orq %r9,%rcx shlq $34,%rdx orq %r11,%rdx movq %rcx,80(%r13) movq %rdx,88(%r13) movq %r14,%r11 shlq $17,%r14 movq %r15,%r9 shrq $47,%r9 shrq $47,%r11 orq %r9,%r14 shlq $17,%r15 orq %r11,%r15 movq %r14,96(%r13) movq %r15,104(%r13) movq %rax,%r11 shlq $34,%rax movq %rbx,%r9 shrq $30,%r9 shrq $30,%r11 orq %r9,%rax shlq $34,%rbx orq %r11,%rbx movq %rax,112(%r13) movq %rbx,120(%r13) movq %r8,%r11 shlq $51,%r8 movq %r10,%r9 shrq $13,%r9 shrq $13,%r11 orq %r9,%r8 shlq $51,%r10 orq %r11,%r10 movq %r8,128(%r13) movq %r10,136(%r13) movl $4,%eax .Ldone: movq 0(%rsp),%r15 movq 8(%rsp),%r14 movq 16(%rsp),%r13 movq 24(%rsp),%rbp movq 32(%rsp),%rbx leaq 40(%rsp),%rsp .Lkey_epilogue: .byte 0xf3,0xc3 .size Camellia_Ekeygen,.-Camellia_Ekeygen .align 64 .LCamellia_SIGMA: .long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 .long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 .long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 .long 0, 0, 0, 0 .LCamellia_SBOX: .long 0x70707000,0x70700070 .long 0x82828200,0x2c2c002c .long 0x2c2c2c00,0xb3b300b3 .long 0xececec00,0xc0c000c0 .long 0xb3b3b300,0xe4e400e4 .long 0x27272700,0x57570057 .long 0xc0c0c000,0xeaea00ea .long 0xe5e5e500,0xaeae00ae .long 0xe4e4e400,0x23230023 .long 0x85858500,0x6b6b006b .long 0x57575700,0x45450045 .long 0x35353500,0xa5a500a5 .long 0xeaeaea00,0xeded00ed .long 0x0c0c0c00,0x4f4f004f .long 0xaeaeae00,0x1d1d001d .long 0x41414100,0x92920092 .long 0x23232300,0x86860086 .long 0xefefef00,0xafaf00af .long 0x6b6b6b00,0x7c7c007c .long 0x93939300,0x1f1f001f .long 0x45454500,0x3e3e003e .long 0x19191900,0xdcdc00dc .long 0xa5a5a500,0x5e5e005e .long 0x21212100,0x0b0b000b .long 0xededed00,0xa6a600a6 .long 0x0e0e0e00,0x39390039 .long 0x4f4f4f00,0xd5d500d5 .long 0x4e4e4e00,0x5d5d005d .long 0x1d1d1d00,0xd9d900d9 .long 0x65656500,0x5a5a005a .long 0x92929200,0x51510051 .long 0xbdbdbd00,0x6c6c006c .long 0x86868600,0x8b8b008b .long 0xb8b8b800,0x9a9a009a .long 0xafafaf00,0xfbfb00fb .long 0x8f8f8f00,0xb0b000b0 .long 0x7c7c7c00,0x74740074 .long 0xebebeb00,0x2b2b002b .long 0x1f1f1f00,0xf0f000f0 .long 0xcecece00,0x84840084 .long 0x3e3e3e00,0xdfdf00df .long 0x30303000,0xcbcb00cb .long 0xdcdcdc00,0x34340034 .long 0x5f5f5f00,0x76760076 .long 0x5e5e5e00,0x6d6d006d .long 0xc5c5c500,0xa9a900a9 .long 0x0b0b0b00,0xd1d100d1 .long 0x1a1a1a00,0x04040004 .long 0xa6a6a600,0x14140014 .long 0xe1e1e100,0x3a3a003a .long 0x39393900,0xdede00de .long 0xcacaca00,0x11110011 .long 0xd5d5d500,0x32320032 .long 0x47474700,0x9c9c009c .long 0x5d5d5d00,0x53530053 .long 0x3d3d3d00,0xf2f200f2 .long 0xd9d9d900,0xfefe00fe .long 0x01010100,0xcfcf00cf .long 0x5a5a5a00,0xc3c300c3 .long 0xd6d6d600,0x7a7a007a .long 0x51515100,0x24240024 .long 0x56565600,0xe8e800e8 .long 0x6c6c6c00,0x60600060 .long 0x4d4d4d00,0x69690069 .long 0x8b8b8b00,0xaaaa00aa .long 0x0d0d0d00,0xa0a000a0 .long 0x9a9a9a00,0xa1a100a1 .long 0x66666600,0x62620062 .long 0xfbfbfb00,0x54540054 .long 0xcccccc00,0x1e1e001e .long 0xb0b0b000,0xe0e000e0 .long 0x2d2d2d00,0x64640064 .long 0x74747400,0x10100010 .long 0x12121200,0x00000000 .long 0x2b2b2b00,0xa3a300a3 .long 0x20202000,0x75750075 .long 0xf0f0f000,0x8a8a008a .long 0xb1b1b100,0xe6e600e6 .long 0x84848400,0x09090009 .long 0x99999900,0xdddd00dd .long 0xdfdfdf00,0x87870087 .long 0x4c4c4c00,0x83830083 .long 0xcbcbcb00,0xcdcd00cd .long 0xc2c2c200,0x90900090 .long 0x34343400,0x73730073 .long 0x7e7e7e00,0xf6f600f6 .long 0x76767600,0x9d9d009d .long 0x05050500,0xbfbf00bf .long 0x6d6d6d00,0x52520052 .long 0xb7b7b700,0xd8d800d8 .long 0xa9a9a900,0xc8c800c8 .long 0x31313100,0xc6c600c6 .long 0xd1d1d100,0x81810081 .long 0x17171700,0x6f6f006f .long 0x04040400,0x13130013 .long 0xd7d7d700,0x63630063 .long 0x14141400,0xe9e900e9 .long 0x58585800,0xa7a700a7 .long 0x3a3a3a00,0x9f9f009f .long 0x61616100,0xbcbc00bc .long 0xdedede00,0x29290029 .long 0x1b1b1b00,0xf9f900f9 .long 0x11111100,0x2f2f002f .long 0x1c1c1c00,0xb4b400b4 .long 0x32323200,0x78780078 .long 0x0f0f0f00,0x06060006 .long 0x9c9c9c00,0xe7e700e7 .long 0x16161600,0x71710071 .long 0x53535300,0xd4d400d4 .long 0x18181800,0xabab00ab .long 0xf2f2f200,0x88880088 .long 0x22222200,0x8d8d008d .long 0xfefefe00,0x72720072 .long 0x44444400,0xb9b900b9 .long 0xcfcfcf00,0xf8f800f8 .long 0xb2b2b200,0xacac00ac .long 0xc3c3c300,0x36360036 .long 0xb5b5b500,0x2a2a002a .long 0x7a7a7a00,0x3c3c003c .long 0x91919100,0xf1f100f1 .long 0x24242400,0x40400040 .long 0x08080800,0xd3d300d3 .long 0xe8e8e800,0xbbbb00bb .long 0xa8a8a800,0x43430043 .long 0x60606000,0x15150015 .long 0xfcfcfc00,0xadad00ad .long 0x69696900,0x77770077 .long 0x50505000,0x80800080 .long 0xaaaaaa00,0x82820082 .long 0xd0d0d000,0xecec00ec .long 0xa0a0a000,0x27270027 .long 0x7d7d7d00,0xe5e500e5 .long 0xa1a1a100,0x85850085 .long 0x89898900,0x35350035 .long 0x62626200,0x0c0c000c .long 0x97979700,0x41410041 .long 0x54545400,0xefef00ef .long 0x5b5b5b00,0x93930093 .long 0x1e1e1e00,0x19190019 .long 0x95959500,0x21210021 .long 0xe0e0e000,0x0e0e000e .long 0xffffff00,0x4e4e004e .long 0x64646400,0x65650065 .long 0xd2d2d200,0xbdbd00bd .long 0x10101000,0xb8b800b8 .long 0xc4c4c400,0x8f8f008f .long 0x00000000,0xebeb00eb .long 0x48484800,0xcece00ce .long 0xa3a3a300,0x30300030 .long 0xf7f7f700,0x5f5f005f .long 0x75757500,0xc5c500c5 .long 0xdbdbdb00,0x1a1a001a .long 0x8a8a8a00,0xe1e100e1 .long 0x03030300,0xcaca00ca .long 0xe6e6e600,0x47470047 .long 0xdadada00,0x3d3d003d .long 0x09090900,0x01010001 .long 0x3f3f3f00,0xd6d600d6 .long 0xdddddd00,0x56560056 .long 0x94949400,0x4d4d004d .long 0x87878700,0x0d0d000d .long 0x5c5c5c00,0x66660066 .long 0x83838300,0xcccc00cc .long 0x02020200,0x2d2d002d .long 0xcdcdcd00,0x12120012 .long 0x4a4a4a00,0x20200020 .long 0x90909000,0xb1b100b1 .long 0x33333300,0x99990099 .long 0x73737300,0x4c4c004c .long 0x67676700,0xc2c200c2 .long 0xf6f6f600,0x7e7e007e .long 0xf3f3f300,0x05050005 .long 0x9d9d9d00,0xb7b700b7 .long 0x7f7f7f00,0x31310031 .long 0xbfbfbf00,0x17170017 .long 0xe2e2e200,0xd7d700d7 .long 0x52525200,0x58580058 .long 0x9b9b9b00,0x61610061 .long 0xd8d8d800,0x1b1b001b .long 0x26262600,0x1c1c001c .long 0xc8c8c800,0x0f0f000f .long 0x37373700,0x16160016 .long 0xc6c6c600,0x18180018 .long 0x3b3b3b00,0x22220022 .long 0x81818100,0x44440044 .long 0x96969600,0xb2b200b2 .long 0x6f6f6f00,0xb5b500b5 .long 0x4b4b4b00,0x91910091 .long 0x13131300,0x08080008 .long 0xbebebe00,0xa8a800a8 .long 0x63636300,0xfcfc00fc .long 0x2e2e2e00,0x50500050 .long 0xe9e9e900,0xd0d000d0 .long 0x79797900,0x7d7d007d .long 0xa7a7a700,0x89890089 .long 0x8c8c8c00,0x97970097 .long 0x9f9f9f00,0x5b5b005b .long 0x6e6e6e00,0x95950095 .long 0xbcbcbc00,0xffff00ff .long 0x8e8e8e00,0xd2d200d2 .long 0x29292900,0xc4c400c4 .long 0xf5f5f500,0x48480048 .long 0xf9f9f900,0xf7f700f7 .long 0xb6b6b600,0xdbdb00db .long 0x2f2f2f00,0x03030003 .long 0xfdfdfd00,0xdada00da .long 0xb4b4b400,0x3f3f003f .long 0x59595900,0x94940094 .long 0x78787800,0x5c5c005c .long 0x98989800,0x02020002 .long 0x06060600,0x4a4a004a .long 0x6a6a6a00,0x33330033 .long 0xe7e7e700,0x67670067 .long 0x46464600,0xf3f300f3 .long 0x71717100,0x7f7f007f .long 0xbababa00,0xe2e200e2 .long 0xd4d4d400,0x9b9b009b .long 0x25252500,0x26260026 .long 0xababab00,0x37370037 .long 0x42424200,0x3b3b003b .long 0x88888800,0x96960096 .long 0xa2a2a200,0x4b4b004b .long 0x8d8d8d00,0xbebe00be .long 0xfafafa00,0x2e2e002e .long 0x72727200,0x79790079 .long 0x07070700,0x8c8c008c .long 0xb9b9b900,0x6e6e006e .long 0x55555500,0x8e8e008e .long 0xf8f8f800,0xf5f500f5 .long 0xeeeeee00,0xb6b600b6 .long 0xacacac00,0xfdfd00fd .long 0x0a0a0a00,0x59590059 .long 0x36363600,0x98980098 .long 0x49494900,0x6a6a006a .long 0x2a2a2a00,0x46460046 .long 0x68686800,0xbaba00ba .long 0x3c3c3c00,0x25250025 .long 0x38383800,0x42420042 .long 0xf1f1f100,0xa2a200a2 .long 0xa4a4a400,0xfafa00fa .long 0x40404000,0x07070007 .long 0x28282800,0x55550055 .long 0xd3d3d300,0xeeee00ee .long 0x7b7b7b00,0x0a0a000a .long 0xbbbbbb00,0x49490049 .long 0xc9c9c900,0x68680068 .long 0x43434300,0x38380038 .long 0xc1c1c100,0xa4a400a4 .long 0x15151500,0x28280028 .long 0xe3e3e300,0x7b7b007b .long 0xadadad00,0xc9c900c9 .long 0xf4f4f400,0xc1c100c1 .long 0x77777700,0xe3e300e3 .long 0xc7c7c700,0xf4f400f4 .long 0x80808000,0xc7c700c7 .long 0x9e9e9e00,0x9e9e009e .long 0x00e0e0e0,0x38003838 .long 0x00050505,0x41004141 .long 0x00585858,0x16001616 .long 0x00d9d9d9,0x76007676 .long 0x00676767,0xd900d9d9 .long 0x004e4e4e,0x93009393 .long 0x00818181,0x60006060 .long 0x00cbcbcb,0xf200f2f2 .long 0x00c9c9c9,0x72007272 .long 0x000b0b0b,0xc200c2c2 .long 0x00aeaeae,0xab00abab .long 0x006a6a6a,0x9a009a9a .long 0x00d5d5d5,0x75007575 .long 0x00181818,0x06000606 .long 0x005d5d5d,0x57005757 .long 0x00828282,0xa000a0a0 .long 0x00464646,0x91009191 .long 0x00dfdfdf,0xf700f7f7 .long 0x00d6d6d6,0xb500b5b5 .long 0x00272727,0xc900c9c9 .long 0x008a8a8a,0xa200a2a2 .long 0x00323232,0x8c008c8c .long 0x004b4b4b,0xd200d2d2 .long 0x00424242,0x90009090 .long 0x00dbdbdb,0xf600f6f6 .long 0x001c1c1c,0x07000707 .long 0x009e9e9e,0xa700a7a7 .long 0x009c9c9c,0x27002727 .long 0x003a3a3a,0x8e008e8e .long 0x00cacaca,0xb200b2b2 .long 0x00252525,0x49004949 .long 0x007b7b7b,0xde00dede .long 0x000d0d0d,0x43004343 .long 0x00717171,0x5c005c5c .long 0x005f5f5f,0xd700d7d7 .long 0x001f1f1f,0xc700c7c7 .long 0x00f8f8f8,0x3e003e3e .long 0x00d7d7d7,0xf500f5f5 .long 0x003e3e3e,0x8f008f8f .long 0x009d9d9d,0x67006767 .long 0x007c7c7c,0x1f001f1f .long 0x00606060,0x18001818 .long 0x00b9b9b9,0x6e006e6e .long 0x00bebebe,0xaf00afaf .long 0x00bcbcbc,0x2f002f2f .long 0x008b8b8b,0xe200e2e2 .long 0x00161616,0x85008585 .long 0x00343434,0x0d000d0d .long 0x004d4d4d,0x53005353 .long 0x00c3c3c3,0xf000f0f0 .long 0x00727272,0x9c009c9c .long 0x00959595,0x65006565 .long 0x00ababab,0xea00eaea .long 0x008e8e8e,0xa300a3a3 .long 0x00bababa,0xae00aeae .long 0x007a7a7a,0x9e009e9e .long 0x00b3b3b3,0xec00ecec .long 0x00020202,0x80008080 .long 0x00b4b4b4,0x2d002d2d .long 0x00adadad,0x6b006b6b .long 0x00a2a2a2,0xa800a8a8 .long 0x00acacac,0x2b002b2b .long 0x00d8d8d8,0x36003636 .long 0x009a9a9a,0xa600a6a6 .long 0x00171717,0xc500c5c5 .long 0x001a1a1a,0x86008686 .long 0x00353535,0x4d004d4d .long 0x00cccccc,0x33003333 .long 0x00f7f7f7,0xfd00fdfd .long 0x00999999,0x66006666 .long 0x00616161,0x58005858 .long 0x005a5a5a,0x96009696 .long 0x00e8e8e8,0x3a003a3a .long 0x00242424,0x09000909 .long 0x00565656,0x95009595 .long 0x00404040,0x10001010 .long 0x00e1e1e1,0x78007878 .long 0x00636363,0xd800d8d8 .long 0x00090909,0x42004242 .long 0x00333333,0xcc00cccc .long 0x00bfbfbf,0xef00efef .long 0x00989898,0x26002626 .long 0x00979797,0xe500e5e5 .long 0x00858585,0x61006161 .long 0x00686868,0x1a001a1a .long 0x00fcfcfc,0x3f003f3f .long 0x00ececec,0x3b003b3b .long 0x000a0a0a,0x82008282 .long 0x00dadada,0xb600b6b6 .long 0x006f6f6f,0xdb00dbdb .long 0x00535353,0xd400d4d4 .long 0x00626262,0x98009898 .long 0x00a3a3a3,0xe800e8e8 .long 0x002e2e2e,0x8b008b8b .long 0x00080808,0x02000202 .long 0x00afafaf,0xeb00ebeb .long 0x00282828,0x0a000a0a .long 0x00b0b0b0,0x2c002c2c .long 0x00747474,0x1d001d1d .long 0x00c2c2c2,0xb000b0b0 .long 0x00bdbdbd,0x6f006f6f .long 0x00363636,0x8d008d8d .long 0x00222222,0x88008888 .long 0x00383838,0x0e000e0e .long 0x00646464,0x19001919 .long 0x001e1e1e,0x87008787 .long 0x00393939,0x4e004e4e .long 0x002c2c2c,0x0b000b0b .long 0x00a6a6a6,0xa900a9a9 .long 0x00303030,0x0c000c0c .long 0x00e5e5e5,0x79007979 .long 0x00444444,0x11001111 .long 0x00fdfdfd,0x7f007f7f .long 0x00888888,0x22002222 .long 0x009f9f9f,0xe700e7e7 .long 0x00656565,0x59005959 .long 0x00878787,0xe100e1e1 .long 0x006b6b6b,0xda00dada .long 0x00f4f4f4,0x3d003d3d .long 0x00232323,0xc800c8c8 .long 0x00484848,0x12001212 .long 0x00101010,0x04000404 .long 0x00d1d1d1,0x74007474 .long 0x00515151,0x54005454 .long 0x00c0c0c0,0x30003030 .long 0x00f9f9f9,0x7e007e7e .long 0x00d2d2d2,0xb400b4b4 .long 0x00a0a0a0,0x28002828 .long 0x00555555,0x55005555 .long 0x00a1a1a1,0x68006868 .long 0x00414141,0x50005050 .long 0x00fafafa,0xbe00bebe .long 0x00434343,0xd000d0d0 .long 0x00131313,0xc400c4c4 .long 0x00c4c4c4,0x31003131 .long 0x002f2f2f,0xcb00cbcb .long 0x00a8a8a8,0x2a002a2a .long 0x00b6b6b6,0xad00adad .long 0x003c3c3c,0x0f000f0f .long 0x002b2b2b,0xca00caca .long 0x00c1c1c1,0x70007070 .long 0x00ffffff,0xff00ffff .long 0x00c8c8c8,0x32003232 .long 0x00a5a5a5,0x69006969 .long 0x00202020,0x08000808 .long 0x00898989,0x62006262 .long 0x00000000,0x00000000 .long 0x00909090,0x24002424 .long 0x00474747,0xd100d1d1 .long 0x00efefef,0xfb00fbfb .long 0x00eaeaea,0xba00baba .long 0x00b7b7b7,0xed00eded .long 0x00151515,0x45004545 .long 0x00060606,0x81008181 .long 0x00cdcdcd,0x73007373 .long 0x00b5b5b5,0x6d006d6d .long 0x00121212,0x84008484 .long 0x007e7e7e,0x9f009f9f .long 0x00bbbbbb,0xee00eeee .long 0x00292929,0x4a004a4a .long 0x000f0f0f,0xc300c3c3 .long 0x00b8b8b8,0x2e002e2e .long 0x00070707,0xc100c1c1 .long 0x00040404,0x01000101 .long 0x009b9b9b,0xe600e6e6 .long 0x00949494,0x25002525 .long 0x00212121,0x48004848 .long 0x00666666,0x99009999 .long 0x00e6e6e6,0xb900b9b9 .long 0x00cecece,0xb300b3b3 .long 0x00ededed,0x7b007b7b .long 0x00e7e7e7,0xf900f9f9 .long 0x003b3b3b,0xce00cece .long 0x00fefefe,0xbf00bfbf .long 0x007f7f7f,0xdf00dfdf .long 0x00c5c5c5,0x71007171 .long 0x00a4a4a4,0x29002929 .long 0x00373737,0xcd00cdcd .long 0x00b1b1b1,0x6c006c6c .long 0x004c4c4c,0x13001313 .long 0x00919191,0x64006464 .long 0x006e6e6e,0x9b009b9b .long 0x008d8d8d,0x63006363 .long 0x00767676,0x9d009d9d .long 0x00030303,0xc000c0c0 .long 0x002d2d2d,0x4b004b4b .long 0x00dedede,0xb700b7b7 .long 0x00969696,0xa500a5a5 .long 0x00262626,0x89008989 .long 0x007d7d7d,0x5f005f5f .long 0x00c6c6c6,0xb100b1b1 .long 0x005c5c5c,0x17001717 .long 0x00d3d3d3,0xf400f4f4 .long 0x00f2f2f2,0xbc00bcbc .long 0x004f4f4f,0xd300d3d3 .long 0x00191919,0x46004646 .long 0x003f3f3f,0xcf00cfcf .long 0x00dcdcdc,0x37003737 .long 0x00797979,0x5e005e5e .long 0x001d1d1d,0x47004747 .long 0x00525252,0x94009494 .long 0x00ebebeb,0xfa00fafa .long 0x00f3f3f3,0xfc00fcfc .long 0x006d6d6d,0x5b005b5b .long 0x005e5e5e,0x97009797 .long 0x00fbfbfb,0xfe00fefe .long 0x00696969,0x5a005a5a .long 0x00b2b2b2,0xac00acac .long 0x00f0f0f0,0x3c003c3c .long 0x00313131,0x4c004c4c .long 0x000c0c0c,0x03000303 .long 0x00d4d4d4,0x35003535 .long 0x00cfcfcf,0xf300f3f3 .long 0x008c8c8c,0x23002323 .long 0x00e2e2e2,0xb800b8b8 .long 0x00757575,0x5d005d5d .long 0x00a9a9a9,0x6a006a6a .long 0x004a4a4a,0x92009292 .long 0x00575757,0xd500d5d5 .long 0x00848484,0x21002121 .long 0x00111111,0x44004444 .long 0x00454545,0x51005151 .long 0x001b1b1b,0xc600c6c6 .long 0x00f5f5f5,0x7d007d7d .long 0x00e4e4e4,0x39003939 .long 0x000e0e0e,0x83008383 .long 0x00737373,0xdc00dcdc .long 0x00aaaaaa,0xaa00aaaa .long 0x00f1f1f1,0x7c007c7c .long 0x00dddddd,0x77007777 .long 0x00595959,0x56005656 .long 0x00141414,0x05000505 .long 0x006c6c6c,0x1b001b1b .long 0x00929292,0xa400a4a4 .long 0x00545454,0x15001515 .long 0x00d0d0d0,0x34003434 .long 0x00787878,0x1e001e1e .long 0x00707070,0x1c001c1c .long 0x00e3e3e3,0xf800f8f8 .long 0x00494949,0x52005252 .long 0x00808080,0x20002020 .long 0x00505050,0x14001414 .long 0x00a7a7a7,0xe900e9e9 .long 0x00f6f6f6,0xbd00bdbd .long 0x00777777,0xdd00dddd .long 0x00939393,0xe400e4e4 .long 0x00868686,0xa100a1a1 .long 0x00838383,0xe000e0e0 .long 0x002a2a2a,0x8a008a8a .long 0x00c7c7c7,0xf100f1f1 .long 0x005b5b5b,0xd600d6d6 .long 0x00e9e9e9,0x7a007a7a .long 0x00eeeeee,0xbb00bbbb .long 0x008f8f8f,0xe300e3e3 .long 0x00010101,0x40004040 .long 0x003d3d3d,0x4f004f4f .globl Camellia_cbc_encrypt .type Camellia_cbc_encrypt,@function .align 16 Camellia_cbc_encrypt: cmpq $0,%rdx je .Lcbc_abort pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 .Lcbc_prologue: movq %rsp,%rbp subq $64,%rsp andq $-64,%rsp leaq -64-63(%rcx),%r10 subq %rsp,%r10 negq %r10 andq $0x3C0,%r10 subq %r10,%rsp movq %rdi,%r12 movq %rsi,%r13 movq %r8,%rbx movq %rcx,%r14 movl 272(%rcx),%r15d movq %r8,40(%rsp) movq %rbp,48(%rsp) .Lcbc_body: leaq .LCamellia_SBOX(%rip),%rbp movl $32,%ecx .align 4 .Lcbc_prefetch_sbox: movq 0(%rbp),%rax movq 32(%rbp),%rsi movq 64(%rbp),%rdi movq 96(%rbp),%r11 leaq 128(%rbp),%rbp loop .Lcbc_prefetch_sbox subq $4096,%rbp shlq $6,%r15 movq %rdx,%rcx leaq (%r14,%r15,1),%r15 cmpl $0,%r9d je .LCBC_DECRYPT andq $-16,%rdx andq $15,%rcx leaq (%r12,%rdx,1),%rdx movq %r14,0(%rsp) movq %rdx,8(%rsp) movq %rcx,16(%rsp) cmpq %r12,%rdx movl 0(%rbx),%r8d movl 4(%rbx),%r9d movl 8(%rbx),%r10d movl 12(%rbx),%r11d je .Lcbc_enc_tail jmp .Lcbc_eloop .align 16 .Lcbc_eloop: xorl 0(%r12),%r8d xorl 4(%r12),%r9d xorl 8(%r12),%r10d bswapl %r8d xorl 12(%r12),%r11d bswapl %r9d bswapl %r10d bswapl %r11d call _x86_64_Camellia_encrypt movq 0(%rsp),%r14 bswapl %r8d movq 8(%rsp),%rdx bswapl %r9d movq 16(%rsp),%rcx bswapl %r10d movl %r8d,0(%r13) bswapl %r11d movl %r9d,4(%r13) movl %r10d,8(%r13) leaq 16(%r12),%r12 movl %r11d,12(%r13) cmpq %rdx,%r12 leaq 16(%r13),%r13 jne .Lcbc_eloop cmpq $0,%rcx jne .Lcbc_enc_tail movq 40(%rsp),%r13 movl %r8d,0(%r13) movl %r9d,4(%r13) movl %r10d,8(%r13) movl %r11d,12(%r13) jmp .Lcbc_done .align 16 .Lcbc_enc_tail: xorq %rax,%rax movq %rax,0+24(%rsp) movq %rax,8+24(%rsp) movq %rax,16(%rsp) .Lcbc_enc_pushf: pushfq cld movq %r12,%rsi leaq 8+24(%rsp),%rdi .long 0x9066A4F3 popfq .Lcbc_enc_popf: leaq 24(%rsp),%r12 leaq 16+24(%rsp),%rax movq %rax,8(%rsp) jmp .Lcbc_eloop .align 16 .LCBC_DECRYPT: xchgq %r14,%r15 addq $15,%rdx andq $15,%rcx andq $-16,%rdx movq %r14,0(%rsp) leaq (%r12,%rdx,1),%rdx movq %rdx,8(%rsp) movq %rcx,16(%rsp) movq (%rbx),%rax movq 8(%rbx),%rbx jmp .Lcbc_dloop .align 16 .Lcbc_dloop: movl 0(%r12),%r8d movl 4(%r12),%r9d movl 8(%r12),%r10d bswapl %r8d movl 12(%r12),%r11d bswapl %r9d movq %rax,0+24(%rsp) bswapl %r10d movq %rbx,8+24(%rsp) bswapl %r11d call _x86_64_Camellia_decrypt movq 0(%rsp),%r14 movq 8(%rsp),%rdx movq 16(%rsp),%rcx bswapl %r8d movq (%r12),%rax bswapl %r9d movq 8(%r12),%rbx bswapl %r10d xorl 0+24(%rsp),%r8d bswapl %r11d xorl 4+24(%rsp),%r9d xorl 8+24(%rsp),%r10d leaq 16(%r12),%r12 xorl 12+24(%rsp),%r11d cmpq %rdx,%r12 je .Lcbc_ddone movl %r8d,0(%r13) movl %r9d,4(%r13) movl %r10d,8(%r13) movl %r11d,12(%r13) leaq 16(%r13),%r13 jmp .Lcbc_dloop .align 16 .Lcbc_ddone: movq 40(%rsp),%rdx cmpq $0,%rcx jne .Lcbc_dec_tail movl %r8d,0(%r13) movl %r9d,4(%r13) movl %r10d,8(%r13) movl %r11d,12(%r13) movq %rax,(%rdx) movq %rbx,8(%rdx) jmp .Lcbc_done .align 16 .Lcbc_dec_tail: movl %r8d,0+24(%rsp) movl %r9d,4+24(%rsp) movl %r10d,8+24(%rsp) movl %r11d,12+24(%rsp) .Lcbc_dec_pushf: pushfq cld leaq 8+24(%rsp),%rsi leaq (%r13),%rdi .long 0x9066A4F3 popfq .Lcbc_dec_popf: movq %rax,(%rdx) movq %rbx,8(%rdx) jmp .Lcbc_done .align 16 .Lcbc_done: movq 48(%rsp),%rcx movq 0(%rcx),%r15 movq 8(%rcx),%r14 movq 16(%rcx),%r13 movq 24(%rcx),%r12 movq 32(%rcx),%rbp movq 40(%rcx),%rbx leaq 48(%rcx),%rsp .Lcbc_abort: .byte 0xf3,0xc3 .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt .byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Index: head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S (revision 299481) @@ -1,2014 +1,3524 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl. .text .align 64 .Lpoly: .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 .LRR: .quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd .LOne: .long 1,1,1,1,1,1,1,1 .LTwo: .long 2,2,2,2,2,2,2,2 .LThree: .long 3,3,3,3,3,3,3,3 .LONE_mont: .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe .globl ecp_nistz256_mul_by_2 .type ecp_nistz256_mul_by_2,@function .align 64 ecp_nistz256_mul_by_2: pushq %r12 pushq %r13 movq 0(%rsi),%r8 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 adcq %r9,%r9 movq 24(%rsi),%r11 leaq .Lpoly(%rip),%rsi movq %r8,%rax adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx sbbq %r13,%r13 subq 0(%rsi),%r8 movq %r10,%rcx sbbq 8(%rsi),%r9 sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 movq %r8,0(%rdi) cmovzq %rcx,%r10 movq %r9,8(%rdi) cmovzq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 .globl ecp_nistz256_div_by_2 .type ecp_nistz256_div_by_2,@function .align 32 ecp_nistz256_div_by_2: pushq %r12 pushq %r13 movq 0(%rsi),%r8 movq 8(%rsi),%r9 movq 16(%rsi),%r10 movq %r8,%rax movq 24(%rsi),%r11 leaq .Lpoly(%rip),%rsi movq %r9,%rdx xorq %r13,%r13 addq 0(%rsi),%r8 movq %r10,%rcx adcq 8(%rsi),%r9 adcq 16(%rsi),%r10 movq %r11,%r12 adcq 24(%rsi),%r11 adcq $0,%r13 xorq %rsi,%rsi testq $1,%rax cmovzq %rax,%r8 cmovzq %rdx,%r9 cmovzq %rcx,%r10 cmovzq %r12,%r11 cmovzq %rsi,%r13 movq %r9,%rax shrq $1,%r8 shlq $63,%rax movq %r10,%rdx shrq $1,%r9 orq %rax,%r8 shlq $63,%rdx movq %r11,%rcx shrq $1,%r10 orq %rdx,%r9 shlq $63,%rcx shrq $1,%r11 shlq $63,%r13 orq %rcx,%r10 orq %r13,%r11 movq %r8,0(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 .globl ecp_nistz256_mul_by_3 .type ecp_nistz256_mul_by_3,@function .align 32 ecp_nistz256_mul_by_3: pushq %r12 pushq %r13 movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 adcq %r9,%r9 movq 24(%rsi),%r11 movq %r8,%rax adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx adcq $0,%r13 subq $-1,%r8 movq %r10,%rcx sbbq .Lpoly+8(%rip),%r9 sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 cmovzq %rcx,%r10 cmovzq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 adcq 8(%rsi),%r9 movq %r8,%rax adcq 16(%rsi),%r10 adcq 24(%rsi),%r11 movq %r9,%rdx adcq $0,%r13 subq $-1,%r8 movq %r10,%rcx sbbq .Lpoly+8(%rip),%r9 sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 movq %r8,0(%rdi) cmovzq %rcx,%r10 movq %r9,8(%rdi) cmovzq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 .globl ecp_nistz256_add .type ecp_nistz256_add,@function .align 32 ecp_nistz256_add: pushq %r12 pushq %r13 movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 movq 16(%rsi),%r10 movq 24(%rsi),%r11 leaq .Lpoly(%rip),%rsi addq 0(%rdx),%r8 adcq 8(%rdx),%r9 movq %r8,%rax adcq 16(%rdx),%r10 adcq 24(%rdx),%r11 movq %r9,%rdx adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx sbbq 8(%rsi),%r9 sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 movq %r8,0(%rdi) cmovzq %rcx,%r10 movq %r9,8(%rdi) cmovzq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_add,.-ecp_nistz256_add .globl ecp_nistz256_sub .type ecp_nistz256_sub,@function .align 32 ecp_nistz256_sub: pushq %r12 pushq %r13 movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 movq 16(%rsi),%r10 movq 24(%rsi),%r11 leaq .Lpoly(%rip),%rsi subq 0(%rdx),%r8 sbbq 8(%rdx),%r9 movq %r8,%rax sbbq 16(%rdx),%r10 sbbq 24(%rdx),%r11 movq %r9,%rdx sbbq $0,%r13 addq 0(%rsi),%r8 movq %r10,%rcx adcq 8(%rsi),%r9 adcq 16(%rsi),%r10 movq %r11,%r12 adcq 24(%rsi),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 movq %r8,0(%rdi) cmovzq %rcx,%r10 movq %r9,8(%rdi) cmovzq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_sub,.-ecp_nistz256_sub .globl ecp_nistz256_neg .type ecp_nistz256_neg,@function .align 32 ecp_nistz256_neg: pushq %r12 pushq %r13 xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 xorq %r11,%r11 xorq %r13,%r13 subq 0(%rsi),%r8 sbbq 8(%rsi),%r9 sbbq 16(%rsi),%r10 movq %r8,%rax sbbq 24(%rsi),%r11 leaq .Lpoly(%rip),%rsi movq %r9,%rdx sbbq $0,%r13 addq 0(%rsi),%r8 movq %r10,%rcx adcq 8(%rsi),%r9 adcq 16(%rsi),%r10 movq %r11,%r12 adcq 24(%rsi),%r11 testq %r13,%r13 cmovzq %rax,%r8 cmovzq %rdx,%r9 movq %r8,0(%rdi) cmovzq %rcx,%r10 movq %r9,8(%rdi) cmovzq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_neg,.-ecp_nistz256_neg .globl ecp_nistz256_to_mont .type ecp_nistz256_to_mont,@function .align 32 ecp_nistz256_to_mont: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx leaq .LRR(%rip),%rdx jmp .Lmul_mont .size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont .globl ecp_nistz256_mul_mont .type ecp_nistz256_mul_mont,@function .align 32 ecp_nistz256_mul_mont: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx .Lmul_mont: pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 + cmpl $0x80100,%ecx + je .Lmul_montx movq %rdx,%rbx movq 0(%rdx),%rax movq 0(%rsi),%r9 movq 8(%rsi),%r10 movq 16(%rsi),%r11 movq 24(%rsi),%r12 call __ecp_nistz256_mul_montq + jmp .Lmul_mont_done + +.align 32 +.Lmul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx .Lmul_mont_done: popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp .byte 0xf3,0xc3 .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont .type __ecp_nistz256_mul_montq,@function .align 32 __ecp_nistz256_mul_montq: movq %rax,%rbp mulq %r9 movq .Lpoly+8(%rip),%r14 movq %rax,%r8 movq %rbp,%rax movq %rdx,%r9 mulq %r10 movq .Lpoly+24(%rip),%r15 addq %rax,%r9 movq %rbp,%rax adcq $0,%rdx movq %rdx,%r10 mulq %r11 addq %rax,%r10 movq %rbp,%rax adcq $0,%rdx movq %rdx,%r11 mulq %r12 addq %rax,%r11 movq %r8,%rax adcq $0,%rdx xorq %r13,%r13 movq %rdx,%r12 movq %r8,%rbp shlq $32,%r8 mulq %r15 shrq $32,%rbp addq %r8,%r9 adcq %rbp,%r10 adcq %rax,%r11 movq 8(%rbx),%rax adcq %rdx,%r12 adcq $0,%r13 xorq %r8,%r8 movq %rax,%rbp mulq 0(%rsi) addq %rax,%r9 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 8(%rsi) addq %rcx,%r10 adcq $0,%rdx addq %rax,%r10 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 16(%rsi) addq %rcx,%r11 adcq $0,%rdx addq %rax,%r11 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 24(%rsi) addq %rcx,%r12 adcq $0,%rdx addq %rax,%r12 movq %r9,%rax adcq %rdx,%r13 adcq $0,%r8 movq %r9,%rbp shlq $32,%r9 mulq %r15 shrq $32,%rbp addq %r9,%r10 adcq %rbp,%r11 adcq %rax,%r12 movq 16(%rbx),%rax adcq %rdx,%r13 adcq $0,%r8 xorq %r9,%r9 movq %rax,%rbp mulq 0(%rsi) addq %rax,%r10 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 8(%rsi) addq %rcx,%r11 adcq $0,%rdx addq %rax,%r11 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 16(%rsi) addq %rcx,%r12 adcq $0,%rdx addq %rax,%r12 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 24(%rsi) addq %rcx,%r13 adcq $0,%rdx addq %rax,%r13 movq %r10,%rax adcq %rdx,%r8 adcq $0,%r9 movq %r10,%rbp shlq $32,%r10 mulq %r15 shrq $32,%rbp addq %r10,%r11 adcq %rbp,%r12 adcq %rax,%r13 movq 24(%rbx),%rax adcq %rdx,%r8 adcq $0,%r9 xorq %r10,%r10 movq %rax,%rbp mulq 0(%rsi) addq %rax,%r11 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 8(%rsi) addq %rcx,%r12 adcq $0,%rdx addq %rax,%r12 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 16(%rsi) addq %rcx,%r13 adcq $0,%rdx addq %rax,%r13 movq %rbp,%rax adcq $0,%rdx movq %rdx,%rcx mulq 24(%rsi) addq %rcx,%r8 adcq $0,%rdx addq %rax,%r8 movq %r11,%rax adcq %rdx,%r9 adcq $0,%r10 movq %r11,%rbp shlq $32,%r11 mulq %r15 shrq $32,%rbp addq %r11,%r12 adcq %rbp,%r13 movq %r12,%rcx adcq %rax,%r8 adcq %rdx,%r9 movq %r13,%rbp adcq $0,%r10 subq $-1,%r12 movq %r8,%rbx sbbq %r14,%r13 sbbq $0,%r8 movq %r9,%rdx sbbq %r15,%r9 sbbq $0,%r10 cmovcq %rcx,%r12 cmovcq %rbp,%r13 movq %r12,0(%rdi) cmovcq %rbx,%r8 movq %r13,8(%rdi) cmovcq %rdx,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) .byte 0xf3,0xc3 .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq .globl ecp_nistz256_sqr_mont .type ecp_nistz256_sqr_mont,@function .align 32 ecp_nistz256_sqr_mont: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 + cmpl $0x80100,%ecx + je .Lsqr_montx movq 0(%rsi),%rax movq 8(%rsi),%r14 movq 16(%rsi),%r15 movq 24(%rsi),%r8 call __ecp_nistz256_sqr_montq + jmp .Lsqr_mont_done + +.align 32 +.Lsqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx .Lsqr_mont_done: popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp .byte 0xf3,0xc3 .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont .type __ecp_nistz256_sqr_montq,@function .align 32 __ecp_nistz256_sqr_montq: movq %rax,%r13 mulq %r14 movq %rax,%r9 movq %r15,%rax movq %rdx,%r10 mulq %r13 addq %rax,%r10 movq %r8,%rax adcq $0,%rdx movq %rdx,%r11 mulq %r13 addq %rax,%r11 movq %r15,%rax adcq $0,%rdx movq %rdx,%r12 mulq %r14 addq %rax,%r11 movq %r8,%rax adcq $0,%rdx movq %rdx,%rbp mulq %r14 addq %rax,%r12 movq %r8,%rax adcq $0,%rdx addq %rbp,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %r15 xorq %r15,%r15 addq %rax,%r13 movq 0(%rsi),%rax movq %rdx,%r14 adcq $0,%r14 addq %r9,%r9 adcq %r10,%r10 adcq %r11,%r11 adcq %r12,%r12 adcq %r13,%r13 adcq %r14,%r14 adcq $0,%r15 mulq %rax movq %rax,%r8 movq 8(%rsi),%rax movq %rdx,%rcx mulq %rax addq %rcx,%r9 adcq %rax,%r10 movq 16(%rsi),%rax adcq $0,%rdx movq %rdx,%rcx mulq %rax addq %rcx,%r11 adcq %rax,%r12 movq 24(%rsi),%rax adcq $0,%rdx movq %rdx,%rcx mulq %rax addq %rcx,%r13 adcq %rax,%r14 movq %r8,%rax adcq %rdx,%r15 movq .Lpoly+8(%rip),%rsi movq .Lpoly+24(%rip),%rbp movq %r8,%rcx shlq $32,%r8 mulq %rbp shrq $32,%rcx addq %r8,%r9 adcq %rcx,%r10 adcq %rax,%r11 movq %r9,%rax adcq $0,%rdx movq %r9,%rcx shlq $32,%r9 movq %rdx,%r8 mulq %rbp shrq $32,%rcx addq %r9,%r10 adcq %rcx,%r11 adcq %rax,%r8 movq %r10,%rax adcq $0,%rdx movq %r10,%rcx shlq $32,%r10 movq %rdx,%r9 mulq %rbp shrq $32,%rcx addq %r10,%r11 adcq %rcx,%r8 adcq %rax,%r9 movq %r11,%rax adcq $0,%rdx movq %r11,%rcx shlq $32,%r11 movq %rdx,%r10 mulq %rbp shrq $32,%rcx addq %r11,%r8 adcq %rcx,%r9 adcq %rax,%r10 adcq $0,%rdx xorq %r11,%r11 addq %r8,%r12 adcq %r9,%r13 movq %r12,%r8 adcq %r10,%r14 adcq %rdx,%r15 movq %r13,%r9 adcq $0,%r11 subq $-1,%r12 movq %r14,%r10 sbbq %rsi,%r13 sbbq $0,%r14 movq %r15,%rcx sbbq %rbp,%r15 sbbq $0,%r11 cmovcq %r8,%r12 cmovcq %r9,%r13 movq %r12,0(%rdi) cmovcq %r10,%r14 movq %r13,8(%rdi) cmovcq %rcx,%r15 movq %r14,16(%rdi) movq %r15,24(%rdi) .byte 0xf3,0xc3 .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq +.type __ecp_nistz256_mul_montx,@function +.align 32 +__ecp_nistz256_mul_montx: + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq .Lpoly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + addq %rbp,%r9 + adcq %rcx,%r10 + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq .Lpoly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx + +.type __ecp_nistz256_sqr_montx,@function +.align 32 +__ecp_nistz256_sqr_montx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq %r8,%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq .Lpoly+24(%rip),%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %rbp,%rcx,%r8 + movq %r9,%rdx + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %rbp,%rcx,%r9 + movq %r10,%rdx + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %rbp,%rcx,%r10 + movq %r11,%rdx + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %rbp,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + adcq %r8,%r12 + movq .Lpoly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + xorl %eax,%eax + sbbq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx + + + + + + .globl ecp_nistz256_from_mont .type ecp_nistz256_from_mont,@function .align 32 ecp_nistz256_from_mont: pushq %r12 pushq %r13 movq 0(%rsi),%rax movq .Lpoly+24(%rip),%r13 movq 8(%rsi),%r9 movq 16(%rsi),%r10 movq 24(%rsi),%r11 movq %rax,%r8 movq .Lpoly+8(%rip),%r12 movq %rax,%rcx shlq $32,%r8 mulq %r13 shrq $32,%rcx addq %r8,%r9 adcq %rcx,%r10 adcq %rax,%r11 movq %r9,%rax adcq $0,%rdx movq %r9,%rcx shlq $32,%r9 movq %rdx,%r8 mulq %r13 shrq $32,%rcx addq %r9,%r10 adcq %rcx,%r11 adcq %rax,%r8 movq %r10,%rax adcq $0,%rdx movq %r10,%rcx shlq $32,%r10 movq %rdx,%r9 mulq %r13 shrq $32,%rcx addq %r10,%r11 adcq %rcx,%r8 adcq %rax,%r9 movq %r11,%rax adcq $0,%rdx movq %r11,%rcx shlq $32,%r11 movq %rdx,%r10 mulq %r13 shrq $32,%rcx addq %r11,%r8 adcq %rcx,%r9 movq %r8,%rcx adcq %rax,%r10 movq %r9,%rsi adcq $0,%rdx subq $-1,%r8 movq %r10,%rax sbbq %r12,%r9 sbbq $0,%r10 movq %rdx,%r11 sbbq %r13,%rdx sbbq %r13,%r13 cmovnzq %rcx,%r8 cmovnzq %rsi,%r9 movq %r8,0(%rdi) cmovnzq %rax,%r10 movq %r9,8(%rdi) cmovzq %rdx,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) popq %r13 popq %r12 .byte 0xf3,0xc3 .size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont .globl ecp_nistz256_select_w5 .type ecp_nistz256_select_w5,@function .align 32 ecp_nistz256_select_w5: + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_select_w5 movdqa .LOne(%rip),%xmm0 movd %edx,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 movdqa %xmm0,%xmm8 pshufd $0,%xmm1,%xmm1 movq $16,%rax .Lselect_loop_sse_w5: movdqa %xmm8,%xmm15 paddd %xmm0,%xmm8 pcmpeqd %xmm1,%xmm15 movdqa 0(%rsi),%xmm9 movdqa 16(%rsi),%xmm10 movdqa 32(%rsi),%xmm11 movdqa 48(%rsi),%xmm12 movdqa 64(%rsi),%xmm13 movdqa 80(%rsi),%xmm14 leaq 96(%rsi),%rsi pand %xmm15,%xmm9 pand %xmm15,%xmm10 por %xmm9,%xmm2 pand %xmm15,%xmm11 por %xmm10,%xmm3 pand %xmm15,%xmm12 por %xmm11,%xmm4 pand %xmm15,%xmm13 por %xmm12,%xmm5 pand %xmm15,%xmm14 por %xmm13,%xmm6 por %xmm14,%xmm7 decq %rax jnz .Lselect_loop_sse_w5 movdqu %xmm2,0(%rdi) movdqu %xmm3,16(%rdi) movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) movdqu %xmm6,64(%rdi) movdqu %xmm7,80(%rdi) .byte 0xf3,0xc3 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 .globl ecp_nistz256_select_w7 .type ecp_nistz256_select_w7,@function .align 32 ecp_nistz256_select_w7: + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_select_w7 movdqa .LOne(%rip),%xmm8 movd %edx,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 movdqa %xmm8,%xmm0 pshufd $0,%xmm1,%xmm1 movq $64,%rax .Lselect_loop_sse_w7: movdqa %xmm8,%xmm15 paddd %xmm0,%xmm8 movdqa 0(%rsi),%xmm9 movdqa 16(%rsi),%xmm10 pcmpeqd %xmm1,%xmm15 movdqa 32(%rsi),%xmm11 movdqa 48(%rsi),%xmm12 leaq 64(%rsi),%rsi pand %xmm15,%xmm9 pand %xmm15,%xmm10 por %xmm9,%xmm2 pand %xmm15,%xmm11 por %xmm10,%xmm3 pand %xmm15,%xmm12 por %xmm11,%xmm4 prefetcht0 255(%rsi) por %xmm12,%xmm5 decq %rax jnz .Lselect_loop_sse_w7 movdqu %xmm2,0(%rdi) movdqu %xmm3,16(%rdi) movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) .byte 0xf3,0xc3 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 + + +.type ecp_nistz256_avx2_select_w5,@function +.align 32 +ecp_nistz256_avx2_select_w5: +.Lavx2_select_w5: + vzeroupper + vmovdqa .LTwo(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa .LOne(%rip),%ymm5 + vmovdqa .LTwo(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +.Lselect_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz .Lselect_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 + + + .globl ecp_nistz256_avx2_select_w7 .type ecp_nistz256_avx2_select_w7,@function .align 32 ecp_nistz256_avx2_select_w7: -.byte 0x0f,0x0b +.Lavx2_select_w7: + vzeroupper + vmovdqa .LThree(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa .LOne(%rip),%ymm4 + vmovdqa .LTwo(%rip),%ymm8 + vmovdqa .LThree(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +.Lselect_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz .Lselect_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper .byte 0xf3,0xc3 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 .type __ecp_nistz256_add_toq,@function .align 32 __ecp_nistz256_add_toq: addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp sbbq %r11,%r11 subq $-1,%r12 movq %r8,%rcx sbbq %r14,%r13 sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 testq %r11,%r11 cmovzq %rax,%r12 cmovzq %rbp,%r13 movq %r12,0(%rdi) cmovzq %rcx,%r8 movq %r13,8(%rdi) cmovzq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) .byte 0xf3,0xc3 .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq .type __ecp_nistz256_sub_fromq,@function .align 32 __ecp_nistz256_sub_fromq: subq 0(%rbx),%r12 sbbq 8(%rbx),%r13 movq %r12,%rax sbbq 16(%rbx),%r8 sbbq 24(%rbx),%r9 movq %r13,%rbp sbbq %r11,%r11 addq $-1,%r12 movq %r8,%rcx adcq %r14,%r13 adcq $0,%r8 movq %r9,%r10 adcq %r15,%r9 testq %r11,%r11 cmovzq %rax,%r12 cmovzq %rbp,%r13 movq %r12,0(%rdi) cmovzq %rcx,%r8 movq %r13,8(%rdi) cmovzq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) .byte 0xf3,0xc3 .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq .type __ecp_nistz256_subq,@function .align 32 __ecp_nistz256_subq: subq %r12,%rax sbbq %r13,%rbp movq %rax,%r12 sbbq %r8,%rcx sbbq %r9,%r10 movq %rbp,%r13 sbbq %r11,%r11 addq $-1,%rax movq %rcx,%r8 adcq %r14,%rbp adcq $0,%rcx movq %r10,%r9 adcq %r15,%r10 testq %r11,%r11 cmovnzq %rax,%r12 cmovnzq %rbp,%r13 cmovnzq %rcx,%r8 cmovnzq %r10,%r9 .byte 0xf3,0xc3 .size __ecp_nistz256_subq,.-__ecp_nistz256_subq .type __ecp_nistz256_mul_by_2q,@function .align 32 __ecp_nistz256_mul_by_2q: addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp sbbq %r11,%r11 subq $-1,%r12 movq %r8,%rcx sbbq %r14,%r13 sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 testq %r11,%r11 cmovzq %rax,%r12 cmovzq %rbp,%r13 movq %r12,0(%rdi) cmovzq %rcx,%r8 movq %r13,8(%rdi) cmovzq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) .byte 0xf3,0xc3 .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q .globl ecp_nistz256_point_double .type ecp_nistz256_point_double,@function .align 32 ecp_nistz256_point_double: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_doublex pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $160+8,%rsp .Lpoint_double_shortcutq: movdqu 0(%rsi),%xmm0 movq %rsi,%rbx movdqu 16(%rsi),%xmm1 movq 32+0(%rsi),%r12 movq 32+8(%rsi),%r13 movq 32+16(%rsi),%r8 movq 32+24(%rsi),%r9 movq .Lpoly+8(%rip),%r14 movq .Lpoly+24(%rip),%r15 movdqa %xmm0,96(%rsp) movdqa %xmm1,96+16(%rsp) leaq 32(%rdi),%r10 leaq 64(%rdi),%r11 .byte 102,72,15,110,199 .byte 102,73,15,110,202 .byte 102,73,15,110,211 leaq 0(%rsp),%rdi call __ecp_nistz256_mul_by_2q movq 64+0(%rsi),%rax movq 64+8(%rsi),%r14 movq 64+16(%rsi),%r15 movq 64+24(%rsi),%r8 leaq 64-0(%rsi),%rsi leaq 64(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 0+0(%rsp),%rax movq 8+0(%rsp),%r14 leaq 0+0(%rsp),%rsi movq 16+0(%rsp),%r15 movq 24+0(%rsp),%r8 leaq 0(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 32(%rbx),%rax movq 64+0(%rbx),%r9 movq 64+8(%rbx),%r10 movq 64+16(%rbx),%r11 movq 64+24(%rbx),%r12 leaq 64-0(%rbx),%rsi leaq 32(%rbx),%rbx .byte 102,72,15,126,215 call __ecp_nistz256_mul_montq call __ecp_nistz256_mul_by_2q movq 96+0(%rsp),%r12 movq 96+8(%rsp),%r13 leaq 64(%rsp),%rbx movq 96+16(%rsp),%r8 movq 96+24(%rsp),%r9 leaq 32(%rsp),%rdi call __ecp_nistz256_add_toq movq 96+0(%rsp),%r12 movq 96+8(%rsp),%r13 leaq 64(%rsp),%rbx movq 96+16(%rsp),%r8 movq 96+24(%rsp),%r9 leaq 64(%rsp),%rdi call __ecp_nistz256_sub_fromq movq 0+0(%rsp),%rax movq 8+0(%rsp),%r14 leaq 0+0(%rsp),%rsi movq 16+0(%rsp),%r15 movq 24+0(%rsp),%r8 .byte 102,72,15,126,207 call __ecp_nistz256_sqr_montq xorq %r9,%r9 movq %r12,%rax addq $-1,%r12 movq %r13,%r10 adcq %rsi,%r13 movq %r14,%rcx adcq $0,%r14 movq %r15,%r8 adcq %rbp,%r15 adcq $0,%r9 xorq %rsi,%rsi testq $1,%rax cmovzq %rax,%r12 cmovzq %r10,%r13 cmovzq %rcx,%r14 cmovzq %r8,%r15 cmovzq %rsi,%r9 movq %r13,%rax shrq $1,%r12 shlq $63,%rax movq %r14,%r10 shrq $1,%r13 orq %rax,%r12 shlq $63,%r10 movq %r15,%rcx shrq $1,%r14 orq %r10,%r13 shlq $63,%rcx movq %r12,0(%rdi) shrq $1,%r15 movq %r13,8(%rdi) shlq $63,%r9 orq %rcx,%r14 orq %r9,%r15 movq %r14,16(%rdi) movq %r15,24(%rdi) movq 64(%rsp),%rax leaq 64(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 32(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 128(%rsp),%rdi call __ecp_nistz256_mul_by_2q leaq 32(%rsp),%rbx leaq 32(%rsp),%rdi call __ecp_nistz256_add_toq movq 96(%rsp),%rax leaq 96(%rsp),%rbx movq 0+0(%rsp),%r9 movq 8+0(%rsp),%r10 leaq 0+0(%rsp),%rsi movq 16+0(%rsp),%r11 movq 24+0(%rsp),%r12 leaq 0(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 128(%rsp),%rdi call __ecp_nistz256_mul_by_2q movq 0+32(%rsp),%rax movq 8+32(%rsp),%r14 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r15 movq 24+32(%rsp),%r8 .byte 102,72,15,126,199 call __ecp_nistz256_sqr_montq leaq 128(%rsp),%rbx movq %r14,%r8 movq %r15,%r9 movq %rsi,%r14 movq %rbp,%r15 call __ecp_nistz256_sub_fromq movq 0+0(%rsp),%rax movq 0+8(%rsp),%rbp movq 0+16(%rsp),%rcx movq 0+24(%rsp),%r10 leaq 0(%rsp),%rdi call __ecp_nistz256_subq movq 32(%rsp),%rax leaq 32(%rsp),%rbx movq %r12,%r14 xorl %ecx,%ecx movq %r12,0+0(%rsp) movq %r13,%r10 movq %r13,0+8(%rsp) cmovzq %r8,%r11 movq %r8,0+16(%rsp) leaq 0-0(%rsp),%rsi cmovzq %r9,%r12 movq %r9,0+24(%rsp) movq %r14,%r9 leaq 0(%rsp),%rdi call __ecp_nistz256_mul_montq .byte 102,72,15,126,203 .byte 102,72,15,126,207 call __ecp_nistz256_sub_fromq addq $160+8,%rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp .byte 0xf3,0xc3 .size ecp_nistz256_point_double,.-ecp_nistz256_point_double .globl ecp_nistz256_point_add .type ecp_nistz256_point_add,@function .align 32 ecp_nistz256_point_add: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_addx pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $576+8,%rsp movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 movdqu 48(%rsi),%xmm3 movdqu 64(%rsi),%xmm4 movdqu 80(%rsi),%xmm5 movq %rsi,%rbx movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) por %xmm1,%xmm3 movdqu 0(%rsi),%xmm0 pshufd $0xb1,%xmm3,%xmm5 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 movdqu 48(%rsi),%xmm3 movq 64+0(%rsi),%rax movq 64+8(%rsi),%r14 movq 64+16(%rsi),%r15 movq 64+24(%rsi),%r8 movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) por %xmm0,%xmm1 .byte 102,72,15,110,199 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 por %xmm1,%xmm3 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) movq %r14,544+8(%rsp) movq %r15,544+16(%rsp) movq %r8,544+24(%rsp) leaq 96(%rsp),%rdi call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 pshufd $0xb1,%xmm3,%xmm4 por %xmm3,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 pxor %xmm3,%xmm3 pcmpeqd %xmm3,%xmm4 pshufd $0,%xmm4,%xmm4 movq 64+0(%rbx),%rax movq 64+8(%rbx),%r14 movq 64+16(%rbx),%r15 movq 64+24(%rbx),%r8 .byte 102,72,15,110,203 leaq 64-0(%rbx),%rsi leaq 32(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 544(%rsp),%rax leaq 544(%rsp),%rbx movq 0+96(%rsp),%r9 movq 8+96(%rsp),%r10 leaq 0+96(%rsp),%rsi movq 16+96(%rsp),%r11 movq 24+96(%rsp),%r12 leaq 224(%rsp),%rdi call __ecp_nistz256_mul_montq movq 448(%rsp),%rax leaq 448(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 256(%rsp),%rdi call __ecp_nistz256_mul_montq movq 416(%rsp),%rax leaq 416(%rsp),%rbx movq 0+224(%rsp),%r9 movq 8+224(%rsp),%r10 leaq 0+224(%rsp),%rsi movq 16+224(%rsp),%r11 movq 24+224(%rsp),%r12 leaq 224(%rsp),%rdi call __ecp_nistz256_mul_montq movq 512(%rsp),%rax leaq 512(%rsp),%rbx movq 0+256(%rsp),%r9 movq 8+256(%rsp),%r10 leaq 0+256(%rsp),%rsi movq 16+256(%rsp),%r11 movq 24+256(%rsp),%r12 leaq 256(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 224(%rsp),%rbx leaq 64(%rsp),%rdi call __ecp_nistz256_sub_fromq orq %r13,%r12 movdqa %xmm4,%xmm2 orq %r8,%r12 orq %r9,%r12 por %xmm5,%xmm2 .byte 102,73,15,110,220 movq 384(%rsp),%rax leaq 384(%rsp),%rbx movq 0+96(%rsp),%r9 movq 8+96(%rsp),%r10 leaq 0+96(%rsp),%rsi movq 16+96(%rsp),%r11 movq 24+96(%rsp),%r12 leaq 160(%rsp),%rdi call __ecp_nistz256_mul_montq movq 480(%rsp),%rax leaq 480(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 192(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 160(%rsp),%rbx leaq 0(%rsp),%rdi call __ecp_nistz256_sub_fromq orq %r13,%r12 orq %r8,%r12 orq %r9,%r12 .byte 0x3e jnz .Ladd_proceedq .byte 102,73,15,126,208 .byte 102,73,15,126,217 testq %r8,%r8 jnz .Ladd_proceedq testq %r9,%r9 jz .Ladd_doubleq .byte 102,72,15,126,199 pxor %xmm0,%xmm0 movdqu %xmm0,0(%rdi) movdqu %xmm0,16(%rdi) movdqu %xmm0,32(%rdi) movdqu %xmm0,48(%rdi) movdqu %xmm0,64(%rdi) movdqu %xmm0,80(%rdi) jmp .Ladd_doneq .align 32 .Ladd_doubleq: .byte 102,72,15,126,206 .byte 102,72,15,126,199 addq $416,%rsp jmp .Lpoint_double_shortcutq .align 32 .Ladd_proceedq: movq 0+64(%rsp),%rax movq 8+64(%rsp),%r14 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r15 movq 24+64(%rsp),%r8 leaq 96(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 448(%rsp),%rax leaq 448(%rsp),%rbx movq 0+0(%rsp),%r9 movq 8+0(%rsp),%r10 leaq 0+0(%rsp),%rsi movq 16+0(%rsp),%r11 movq 24+0(%rsp),%r12 leaq 352(%rsp),%rdi call __ecp_nistz256_mul_montq movq 0+0(%rsp),%rax movq 8+0(%rsp),%r14 leaq 0+0(%rsp),%rsi movq 16+0(%rsp),%r15 movq 24+0(%rsp),%r8 leaq 32(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 544(%rsp),%rax leaq 544(%rsp),%rbx movq 0+352(%rsp),%r9 movq 8+352(%rsp),%r10 leaq 0+352(%rsp),%rsi movq 16+352(%rsp),%r11 movq 24+352(%rsp),%r12 leaq 352(%rsp),%rdi call __ecp_nistz256_mul_montq movq 0(%rsp),%rax leaq 0(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 128(%rsp),%rdi call __ecp_nistz256_mul_montq movq 160(%rsp),%rax leaq 160(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 192(%rsp),%rdi call __ecp_nistz256_mul_montq addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp sbbq %r11,%r11 subq $-1,%r12 movq %r8,%rcx sbbq %r14,%r13 sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 testq %r11,%r11 cmovzq %rax,%r12 movq 0(%rsi),%rax cmovzq %rbp,%r13 movq 8(%rsi),%rbp cmovzq %rcx,%r8 movq 16(%rsi),%rcx cmovzq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq leaq 128(%rsp),%rbx leaq 288(%rsp),%rdi call __ecp_nistz256_sub_fromq movq 192+0(%rsp),%rax movq 192+8(%rsp),%rbp movq 192+16(%rsp),%rcx movq 192+24(%rsp),%r10 leaq 320(%rsp),%rdi call __ecp_nistz256_subq movq %r12,0(%rdi) movq %r13,8(%rdi) movq %r8,16(%rdi) movq %r9,24(%rdi) movq 128(%rsp),%rax leaq 128(%rsp),%rbx movq 0+224(%rsp),%r9 movq 8+224(%rsp),%r10 leaq 0+224(%rsp),%rsi movq 16+224(%rsp),%r11 movq 24+224(%rsp),%r12 leaq 256(%rsp),%rdi call __ecp_nistz256_mul_montq movq 320(%rsp),%rax leaq 320(%rsp),%rbx movq 0+64(%rsp),%r9 movq 8+64(%rsp),%r10 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r11 movq 24+64(%rsp),%r12 leaq 320(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 256(%rsp),%rbx leaq 320(%rsp),%rdi call __ecp_nistz256_sub_fromq .byte 102,72,15,126,199 movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 352(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 352+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand 544(%rsp),%xmm2 pand 544+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 448(%rsp),%xmm2 pand 448+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,64(%rdi) movdqu %xmm3,80(%rdi) movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 288(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 288+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand 480(%rsp),%xmm2 pand 480+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 384(%rsp),%xmm2 pand 384+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,0(%rdi) movdqu %xmm3,16(%rdi) movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 320(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 320+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand 512(%rsp),%xmm2 pand 512+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 416(%rsp),%xmm2 pand 416+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) .Ladd_doneq: addq $576+8,%rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp .byte 0xf3,0xc3 .size ecp_nistz256_point_add,.-ecp_nistz256_point_add .globl ecp_nistz256_point_add_affine .type ecp_nistz256_point_add_affine,@function .align 32 ecp_nistz256_point_add_affine: + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_add_affinex pushq %rbp pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $480+8,%rsp movdqu 0(%rsi),%xmm0 movq %rdx,%rbx movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 movdqu 48(%rsi),%xmm3 movdqu 64(%rsi),%xmm4 movdqu 80(%rsi),%xmm5 movq 64+0(%rsi),%rax movq 64+8(%rsi),%r14 movq 64+16(%rsi),%r15 movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) por %xmm1,%xmm3 movdqu 0(%rbx),%xmm0 pshufd $0xb1,%xmm3,%xmm5 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 movdqu 48(%rbx),%xmm3 movdqa %xmm0,416(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,416+16(%rsp) por %xmm0,%xmm1 .byte 102,72,15,110,199 movdqa %xmm2,448(%rsp) movdqa %xmm3,448+16(%rsp) por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 por %xmm1,%xmm3 leaq 64-0(%rsi),%rsi leaq 32(%rsp),%rdi call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 pshufd $0xb1,%xmm3,%xmm4 movq 0(%rbx),%rax movq %r12,%r9 por %xmm3,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 movq %r13,%r10 por %xmm3,%xmm4 pxor %xmm3,%xmm3 movq %r14,%r11 pcmpeqd %xmm3,%xmm4 pshufd $0,%xmm4,%xmm4 leaq 32-0(%rsp),%rsi movq %r15,%r12 leaq 0(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 320(%rsp),%rbx leaq 64(%rsp),%rdi call __ecp_nistz256_sub_fromq movq 384(%rsp),%rax leaq 384(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 32(%rsp),%rdi call __ecp_nistz256_mul_montq movq 384(%rsp),%rax leaq 384(%rsp),%rbx movq 0+64(%rsp),%r9 movq 8+64(%rsp),%r10 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r11 movq 24+64(%rsp),%r12 leaq 288(%rsp),%rdi call __ecp_nistz256_mul_montq movq 448(%rsp),%rax leaq 448(%rsp),%rbx movq 0+32(%rsp),%r9 movq 8+32(%rsp),%r10 leaq 0+32(%rsp),%rsi movq 16+32(%rsp),%r11 movq 24+32(%rsp),%r12 leaq 32(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 352(%rsp),%rbx leaq 96(%rsp),%rdi call __ecp_nistz256_sub_fromq movq 0+64(%rsp),%rax movq 8+64(%rsp),%r14 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r15 movq 24+64(%rsp),%r8 leaq 128(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 0+96(%rsp),%rax movq 8+96(%rsp),%r14 leaq 0+96(%rsp),%rsi movq 16+96(%rsp),%r15 movq 24+96(%rsp),%r8 leaq 192(%rsp),%rdi call __ecp_nistz256_sqr_montq movq 128(%rsp),%rax leaq 128(%rsp),%rbx movq 0+64(%rsp),%r9 movq 8+64(%rsp),%r10 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r11 movq 24+64(%rsp),%r12 leaq 160(%rsp),%rdi call __ecp_nistz256_mul_montq movq 320(%rsp),%rax leaq 320(%rsp),%rbx movq 0+128(%rsp),%r9 movq 8+128(%rsp),%r10 leaq 0+128(%rsp),%rsi movq 16+128(%rsp),%r11 movq 24+128(%rsp),%r12 leaq 0(%rsp),%rdi call __ecp_nistz256_mul_montq addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp sbbq %r11,%r11 subq $-1,%r12 movq %r8,%rcx sbbq %r14,%r13 sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 testq %r11,%r11 cmovzq %rax,%r12 movq 0(%rsi),%rax cmovzq %rbp,%r13 movq 8(%rsi),%rbp cmovzq %rcx,%r8 movq 16(%rsi),%rcx cmovzq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq leaq 160(%rsp),%rbx leaq 224(%rsp),%rdi call __ecp_nistz256_sub_fromq movq 0+0(%rsp),%rax movq 0+8(%rsp),%rbp movq 0+16(%rsp),%rcx movq 0+24(%rsp),%r10 leaq 64(%rsp),%rdi call __ecp_nistz256_subq movq %r12,0(%rdi) movq %r13,8(%rdi) movq %r8,16(%rdi) movq %r9,24(%rdi) movq 352(%rsp),%rax leaq 352(%rsp),%rbx movq 0+160(%rsp),%r9 movq 8+160(%rsp),%r10 leaq 0+160(%rsp),%rsi movq 16+160(%rsp),%r11 movq 24+160(%rsp),%r12 leaq 32(%rsp),%rdi call __ecp_nistz256_mul_montq movq 96(%rsp),%rax leaq 96(%rsp),%rbx movq 0+64(%rsp),%r9 movq 8+64(%rsp),%r10 leaq 0+64(%rsp),%rsi movq 16+64(%rsp),%r11 movq 24+64(%rsp),%r12 leaq 64(%rsp),%rdi call __ecp_nistz256_mul_montq leaq 32(%rsp),%rbx leaq 256(%rsp),%rdi call __ecp_nistz256_sub_fromq .byte 102,72,15,126,199 movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 288(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 288+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand .LONE_mont(%rip),%xmm2 pand .LONE_mont+16(%rip),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 384(%rsp),%xmm2 pand 384+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,64(%rdi) movdqu %xmm3,80(%rdi) movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 224(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 224+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand 416(%rsp),%xmm2 pand 416+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 320(%rsp),%xmm2 pand 320+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,0(%rdi) movdqu %xmm3,16(%rdi) movdqa %xmm5,%xmm0 movdqa %xmm5,%xmm1 pandn 256(%rsp),%xmm0 movdqa %xmm5,%xmm2 pandn 256+16(%rsp),%xmm1 movdqa %xmm5,%xmm3 pand 448(%rsp),%xmm2 pand 448+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqa %xmm4,%xmm0 movdqa %xmm4,%xmm1 pandn %xmm2,%xmm0 movdqa %xmm4,%xmm2 pandn %xmm3,%xmm1 movdqa %xmm4,%xmm3 pand 352(%rsp),%xmm2 pand 352+16(%rsp),%xmm3 por %xmm0,%xmm2 por %xmm1,%xmm3 movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) addq $480+8,%rsp popq %r15 popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp .byte 0xf3,0xc3 .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +.type __ecp_nistz256_add_tox,@function +.align 32 +__ecp_nistz256_add_tox: + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox + +.type __ecp_nistz256_sub_fromx,@function +.align 32 +__ecp_nistz256_sub_fromx: + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx + +.type __ecp_nistz256_subx,@function +.align 32 +__ecp_nistz256_subx: + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + .byte 0xf3,0xc3 +.size __ecp_nistz256_subx,.-__ecp_nistz256_subx + +.type __ecp_nistz256_mul_by_2x,@function +.align 32 +__ecp_nistz256_mul_by_2x: + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x +.type ecp_nistz256_point_doublex,@function +.align 32 +ecp_nistz256_point_doublex: +.Lpoint_doublex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + +.Lpoint_double_shortcutx: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex +.type ecp_nistz256_point_addx,@function +.align 32 +ecp_nistz256_point_addx: +.Lpoint_addx: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz .Ladd_proceedx +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz .Ladd_proceedx + testq %r9,%r9 + jz .Ladd_doublex + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_donex + +.align 32 +.Ladd_doublex: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp + jmp .Lpoint_double_shortcutx + +.align 32 +.Ladd_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_donex: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx +.type ecp_nistz256_point_add_affinex,@function +.align 32 +ecp_nistz256_point_add_affinex: +.Lpoint_add_affinex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex Index: head/secure/lib/libcrypto/amd64/ghash-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/ghash-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/ghash-x86_64.S (revision 299481) @@ -1,1319 +1,1791 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from ghash-x86_64.pl. .text .globl gcm_gmult_4bit .type gcm_gmult_4bit,@function .align 16 gcm_gmult_4bit: pushq %rbx pushq %rbp pushq %r12 .Lgmult_prologue: movzbq 15(%rdi),%r8 leaq .Lrem_4bit(%rip),%r11 xorq %rax,%rax xorq %rbx,%rbx movb %r8b,%al movb %r8b,%bl shlb $4,%al movq $14,%rcx movq 8(%rsi,%rax,1),%r8 movq (%rsi,%rax,1),%r9 andb $0xf0,%bl movq %r8,%rdx jmp .Loop1 .align 16 .Loop1: shrq $4,%r8 andq $0xf,%rdx movq %r9,%r10 movb (%rdi,%rcx,1),%al shrq $4,%r9 xorq 8(%rsi,%rbx,1),%r8 shlq $60,%r10 xorq (%rsi,%rbx,1),%r9 movb %al,%bl xorq (%r11,%rdx,8),%r9 movq %r8,%rdx shlb $4,%al xorq %r10,%r8 decq %rcx js .Lbreak1 shrq $4,%r8 andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rax,1),%r8 shlq $60,%r10 xorq (%rsi,%rax,1),%r9 andb $0xf0,%bl xorq (%r11,%rdx,8),%r9 movq %r8,%rdx xorq %r10,%r8 jmp .Loop1 .align 16 .Lbreak1: shrq $4,%r8 andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rax,1),%r8 shlq $60,%r10 xorq (%rsi,%rax,1),%r9 andb $0xf0,%bl xorq (%r11,%rdx,8),%r9 movq %r8,%rdx xorq %r10,%r8 shrq $4,%r8 andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rbx,1),%r8 shlq $60,%r10 xorq (%rsi,%rbx,1),%r9 xorq %r10,%r8 xorq (%r11,%rdx,8),%r9 bswapq %r8 bswapq %r9 movq %r8,8(%rdi) movq %r9,(%rdi) movq 16(%rsp),%rbx leaq 24(%rsp),%rsp .Lgmult_epilogue: .byte 0xf3,0xc3 .size gcm_gmult_4bit,.-gcm_gmult_4bit .globl gcm_ghash_4bit .type gcm_ghash_4bit,@function .align 16 gcm_ghash_4bit: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $280,%rsp .Lghash_prologue: movq %rdx,%r14 movq %rcx,%r15 subq $-128,%rsi leaq 16+128(%rsp),%rbp xorl %edx,%edx movq 0+0-128(%rsi),%r8 movq 0+8-128(%rsi),%rax movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq 16+0-128(%rsi),%r9 shlb $4,%dl movq 16+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,0(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,0(%rbp) movq 32+0-128(%rsi),%r8 shlb $4,%dl movq %rax,0-128(%rbp) movq 32+8-128(%rsi),%rax shlq $60,%r10 movb %dl,1(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,8(%rbp) movq 48+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,8-128(%rbp) movq 48+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,2(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,16(%rbp) movq 64+0-128(%rsi),%r8 shlb $4,%dl movq %rax,16-128(%rbp) movq 64+8-128(%rsi),%rax shlq $60,%r10 movb %dl,3(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,24(%rbp) movq 80+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,24-128(%rbp) movq 80+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,4(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,32(%rbp) movq 96+0-128(%rsi),%r8 shlb $4,%dl movq %rax,32-128(%rbp) movq 96+8-128(%rsi),%rax shlq $60,%r10 movb %dl,5(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,40(%rbp) movq 112+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,40-128(%rbp) movq 112+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,6(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,48(%rbp) movq 128+0-128(%rsi),%r8 shlb $4,%dl movq %rax,48-128(%rbp) movq 128+8-128(%rsi),%rax shlq $60,%r10 movb %dl,7(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,56(%rbp) movq 144+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,56-128(%rbp) movq 144+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,8(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,64(%rbp) movq 160+0-128(%rsi),%r8 shlb $4,%dl movq %rax,64-128(%rbp) movq 160+8-128(%rsi),%rax shlq $60,%r10 movb %dl,9(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,72(%rbp) movq 176+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,72-128(%rbp) movq 176+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,10(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,80(%rbp) movq 192+0-128(%rsi),%r8 shlb $4,%dl movq %rax,80-128(%rbp) movq 192+8-128(%rsi),%rax shlq $60,%r10 movb %dl,11(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,88(%rbp) movq 208+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,88-128(%rbp) movq 208+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,12(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,96(%rbp) movq 224+0-128(%rsi),%r8 shlb $4,%dl movq %rax,96-128(%rbp) movq 224+8-128(%rsi),%rax shlq $60,%r10 movb %dl,13(%rsp) orq %r10,%rbx movb %al,%dl shrq $4,%rax movq %r8,%r10 shrq $4,%r8 movq %r9,104(%rbp) movq 240+0-128(%rsi),%r9 shlb $4,%dl movq %rbx,104-128(%rbp) movq 240+8-128(%rsi),%rbx shlq $60,%r10 movb %dl,14(%rsp) orq %r10,%rax movb %bl,%dl shrq $4,%rbx movq %r9,%r10 shrq $4,%r9 movq %r8,112(%rbp) shlb $4,%dl movq %rax,112-128(%rbp) shlq $60,%r10 movb %dl,15(%rsp) orq %r10,%rbx movq %r9,120(%rbp) movq %rbx,120-128(%rbp) addq $-128,%rsi movq 8(%rdi),%r8 movq 0(%rdi),%r9 addq %r14,%r15 leaq .Lrem_8bit(%rip),%r11 jmp .Louter_loop .align 16 .Louter_loop: xorq (%r14),%r9 movq 8(%r14),%rdx leaq 16(%r14),%r14 xorq %r8,%rdx movq %r9,(%rdi) movq %rdx,8(%rdi) shrq $32,%rdx xorq %rax,%rax roll $8,%edx movb %dl,%al movzbl %dl,%ebx shlb $4,%al shrl $4,%ebx roll $8,%edx movq 8(%rsi,%rax,1),%r8 movq (%rsi,%rax,1),%r9 movb %dl,%al movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx xorq %r8,%r12 movq %r9,%r10 shrq $8,%r8 movzbq %r12b,%r12 shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 movl 8(%rdi),%edx shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 movl 4(%rdi),%edx shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 movl 0(%rdi),%edx shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 shrl $4,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r12,2),%r12 movzbl %dl,%ebx shlb $4,%al movzbq (%rsp,%rcx,1),%r13 shrl $4,%ebx shlq $48,%r12 xorq %r8,%r13 movq %r9,%r10 xorq %r12,%r9 shrq $8,%r8 movzbq %r13b,%r13 shrq $8,%r9 xorq -128(%rbp,%rcx,8),%r8 shlq $56,%r10 xorq (%rbp,%rcx,8),%r9 roll $8,%edx xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 movb %dl,%al xorq %r10,%r8 movzwq (%r11,%r13,2),%r13 movzbl %dl,%ecx shlb $4,%al movzbq (%rsp,%rbx,1),%r12 andl $240,%ecx shlq $48,%r13 xorq %r8,%r12 movq %r9,%r10 xorq %r13,%r9 shrq $8,%r8 movzbq %r12b,%r12 movl -4(%rdi),%edx shrq $8,%r9 xorq -128(%rbp,%rbx,8),%r8 shlq $56,%r10 xorq (%rbp,%rbx,8),%r9 movzwq (%r11,%r12,2),%r12 xorq 8(%rsi,%rax,1),%r8 xorq (%rsi,%rax,1),%r9 shlq $48,%r12 xorq %r10,%r8 xorq %r12,%r9 movzbq %r8b,%r13 shrq $4,%r8 movq %r9,%r10 shlb $4,%r13b shrq $4,%r9 xorq 8(%rsi,%rcx,1),%r8 movzwq (%r11,%r13,2),%r13 shlq $60,%r10 xorq (%rsi,%rcx,1),%r9 xorq %r10,%r8 shlq $48,%r13 bswapq %r8 xorq %r13,%r9 bswapq %r9 cmpq %r15,%r14 jb .Louter_loop movq %r8,8(%rdi) movq %r9,(%rdi) leaq 280(%rsp),%rsi movq 0(%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lghash_epilogue: .byte 0xf3,0xc3 .size gcm_ghash_4bit,.-gcm_ghash_4bit .globl gcm_init_clmul .type gcm_init_clmul,@function .align 16 gcm_init_clmul: .L_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 pshufd $255,%xmm2,%xmm4 movdqa %xmm2,%xmm3 psllq $1,%xmm2 pxor %xmm5,%xmm5 psrlq $63,%xmm3 pcmpgtd %xmm4,%xmm5 pslldq $8,%xmm3 por %xmm3,%xmm2 pand .L0x1c2_polynomial(%rip),%xmm5 pxor %xmm5,%xmm2 pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm2,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm2,%xmm3 movdqu %xmm2,0(%rdi) pxor %xmm0,%xmm4 movdqu %xmm0,16(%rdi) .byte 102,15,58,15,227,8 movdqu %xmm4,32(%rdi) movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 movdqa %xmm0,%xmm5 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm5,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm5,%xmm3 movdqu %xmm5,48(%rdi) pxor %xmm0,%xmm4 movdqu %xmm0,64(%rdi) .byte 102,15,58,15,227,8 movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 .size gcm_init_clmul,.-gcm_init_clmul .globl gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size gcm_gmult_clmul,.-gcm_gmult_clmul .globl gcm_ghash_clmul .type gcm_ghash_clmul,@function .align 32 gcm_ghash_clmul: .L_ghash_clmul: movdqa .Lbswap_mask(%rip),%xmm10 movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 movdqu 32(%rsi),%xmm7 .byte 102,65,15,56,0,194 subq $0x10,%rcx jz .Lodd_tail movdqu 16(%rsi),%xmm6 movl OPENSSL_ia32cap_P+4(%rip),%eax cmpq $0x30,%rcx jb .Lskip4x andl $71303168,%eax cmpl $4194304,%eax je .Lskip4x subq $0x30,%rcx movq $0xA040608020C0E000,%rax movdqu 48(%rsi),%xmm14 movdqu 64(%rsi),%xmm15 movdqu 48(%rdx),%xmm3 movdqu 32(%rdx),%xmm11 .byte 102,65,15,56,0,218 .byte 102,69,15,56,0,218 movdqa %xmm3,%xmm5 pshufd $78,%xmm3,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,68,218,0 .byte 102,15,58,68,234,17 .byte 102,15,58,68,231,0 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 pxor %xmm11,%xmm12 .byte 102,68,15,58,68,222,0 .byte 102,68,15,58,68,238,17 .byte 102,68,15,58,68,231,16 xorps %xmm11,%xmm3 xorps %xmm13,%xmm5 movups 80(%rsi),%xmm7 xorps %xmm12,%xmm4 movdqu 16(%rdx),%xmm11 movdqu 0(%rdx),%xmm8 .byte 102,69,15,56,0,218 .byte 102,69,15,56,0,194 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 pxor %xmm8,%xmm0 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm8 pxor %xmm0,%xmm8 .byte 102,69,15,58,68,238,17 .byte 102,68,15,58,68,231,0 xorps %xmm11,%xmm3 xorps %xmm13,%xmm5 leaq 64(%rdx),%rdx subq $0x40,%rcx jc .Ltail4x jmp .Lmod4_loop .align 32 .Lmod4_loop: .byte 102,65,15,58,68,199,0 xorps %xmm12,%xmm4 movdqu 48(%rdx),%xmm11 .byte 102,69,15,56,0,218 .byte 102,65,15,58,68,207,17 xorps %xmm3,%xmm0 movdqu 32(%rdx),%xmm3 movdqa %xmm11,%xmm13 .byte 102,68,15,58,68,199,16 pshufd $78,%xmm11,%xmm12 xorps %xmm5,%xmm1 pxor %xmm11,%xmm12 .byte 102,65,15,56,0,218 movups 32(%rsi),%xmm7 xorps %xmm4,%xmm8 .byte 102,68,15,58,68,218,0 pshufd $78,%xmm3,%xmm4 pxor %xmm0,%xmm8 movdqa %xmm3,%xmm5 pxor %xmm1,%xmm8 pxor %xmm3,%xmm4 movdqa %xmm8,%xmm9 .byte 102,68,15,58,68,234,17 pslldq $8,%xmm8 psrldq $8,%xmm9 pxor %xmm8,%xmm0 movdqa .L7_mask(%rip),%xmm8 pxor %xmm9,%xmm1 .byte 102,76,15,110,200 pand %xmm0,%xmm8 .byte 102,69,15,56,0,200 pxor %xmm0,%xmm9 .byte 102,68,15,58,68,231,0 psllq $57,%xmm9 movdqa %xmm9,%xmm8 pslldq $8,%xmm9 .byte 102,15,58,68,222,0 psrldq $8,%xmm8 pxor %xmm9,%xmm0 pxor %xmm8,%xmm1 movdqu 0(%rdx),%xmm8 movdqa %xmm0,%xmm9 psrlq $1,%xmm0 .byte 102,15,58,68,238,17 xorps %xmm11,%xmm3 movdqu 16(%rdx),%xmm11 .byte 102,69,15,56,0,218 .byte 102,15,58,68,231,16 xorps %xmm13,%xmm5 movups 80(%rsi),%xmm7 .byte 102,69,15,56,0,194 pxor %xmm9,%xmm1 pxor %xmm0,%xmm9 psrlq $5,%xmm0 movdqa %xmm11,%xmm13 pxor %xmm12,%xmm4 pshufd $78,%xmm11,%xmm12 pxor %xmm9,%xmm0 pxor %xmm8,%xmm1 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 movdqa %xmm0,%xmm1 .byte 102,69,15,58,68,238,17 xorps %xmm11,%xmm3 pshufd $78,%xmm0,%xmm8 pxor %xmm0,%xmm8 .byte 102,68,15,58,68,231,0 xorps %xmm13,%xmm5 leaq 64(%rdx),%rdx subq $0x40,%rcx jnc .Lmod4_loop .Ltail4x: .byte 102,65,15,58,68,199,0 .byte 102,65,15,58,68,207,17 .byte 102,68,15,58,68,199,16 xorps %xmm12,%xmm4 xorps %xmm3,%xmm0 xorps %xmm5,%xmm1 pxor %xmm0,%xmm1 pxor %xmm4,%xmm8 pxor %xmm1,%xmm8 pxor %xmm0,%xmm1 movdqa %xmm8,%xmm9 psrldq $8,%xmm8 pslldq $8,%xmm9 pxor %xmm8,%xmm1 pxor %xmm9,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 addq $0x40,%rcx jz .Ldone movdqu 32(%rsi),%xmm7 subq $0x10,%rcx jz .Lodd_tail .Lskip4x: movdqu (%rdx),%xmm8 movdqu 16(%rdx),%xmm3 .byte 102,69,15,56,0,194 .byte 102,65,15,56,0,218 pxor %xmm8,%xmm0 movdqa %xmm3,%xmm5 pshufd $78,%xmm3,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,68,218,0 .byte 102,15,58,68,234,17 .byte 102,15,58,68,231,0 leaq 32(%rdx),%rdx nop subq $0x20,%rcx jbe .Leven_tail nop jmp .Lmod_loop .align 32 .Lmod_loop: movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 .byte 102,15,58,68,198,0 .byte 102,15,58,68,206,17 .byte 102,15,58,68,231,16 pxor %xmm3,%xmm0 pxor %xmm5,%xmm1 movdqu (%rdx),%xmm9 pxor %xmm0,%xmm8 .byte 102,69,15,56,0,202 movdqu 16(%rdx),%xmm3 pxor %xmm1,%xmm8 pxor %xmm9,%xmm1 pxor %xmm8,%xmm4 .byte 102,65,15,56,0,218 movdqa %xmm4,%xmm8 psrldq $8,%xmm8 pslldq $8,%xmm4 pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm3,%xmm5 movdqa %xmm0,%xmm9 movdqa %xmm0,%xmm8 psllq $5,%xmm0 pxor %xmm0,%xmm8 .byte 102,15,58,68,218,0 psllq $1,%xmm0 pxor %xmm8,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm8 pslldq $8,%xmm0 psrldq $8,%xmm8 pxor %xmm9,%xmm0 pshufd $78,%xmm5,%xmm4 pxor %xmm8,%xmm1 pxor %xmm5,%xmm4 movdqa %xmm0,%xmm9 psrlq $1,%xmm0 .byte 102,15,58,68,234,17 pxor %xmm9,%xmm1 pxor %xmm0,%xmm9 psrlq $5,%xmm0 pxor %xmm9,%xmm0 leaq 32(%rdx),%rdx psrlq $1,%xmm0 .byte 102,15,58,68,231,0 pxor %xmm1,%xmm0 subq $0x20,%rcx ja .Lmod_loop .Leven_tail: movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 .byte 102,15,58,68,198,0 .byte 102,15,58,68,206,17 .byte 102,15,58,68,231,16 pxor %xmm3,%xmm0 pxor %xmm5,%xmm1 pxor %xmm0,%xmm8 pxor %xmm1,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm8 psrldq $8,%xmm8 pslldq $8,%xmm4 pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 testq %rcx,%rcx jnz .Ldone .Lodd_tail: movdqu (%rdx),%xmm8 .byte 102,69,15,56,0,194 pxor %xmm8,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,223,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .Ldone: .byte 102,65,15,56,0,194 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size gcm_ghash_clmul,.-gcm_ghash_clmul .globl gcm_init_avx .type gcm_init_avx,@function .align 32 gcm_init_avx: - jmp .L_init_clmul + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.align 32 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + .byte 0xf3,0xc3 .size gcm_init_avx,.-gcm_init_avx .globl gcm_gmult_avx .type gcm_gmult_avx,@function .align 32 gcm_gmult_avx: jmp .L_gmult_clmul .size gcm_gmult_avx,.-gcm_gmult_avx .globl gcm_ghash_avx .type gcm_ghash_avx,@function .align 32 gcm_ghash_avx: - jmp .L_ghash_clmul + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%rcx + jb .Lshort_avx + subq $0x80,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $0x80,%rcx + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%rcx + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $0x80,%rcx + jnc .Loop8x_avx + + addq $0x80,%rcx + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $0x10,%rcx + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + .byte 0xf3,0xc3 .size gcm_ghash_avx,.-gcm_ghash_avx .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 .L7_mask: .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0 .align 64 .type .Lrem_4bit,@object .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .type .Lrem_8bit,@object .Lrem_8bit: .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 Index: head/secure/lib/libcrypto/amd64/md5-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/md5-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/md5-x86_64.S (revision 299481) @@ -1,669 +1,670 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from md5-x86_64.pl. .text .align 16 .globl md5_block_asm_data_order .type md5_block_asm_data_order,@function md5_block_asm_data_order: pushq %rbp pushq %rbx pushq %r12 pushq %r14 pushq %r15 .Lprologue: movq %rdi,%rbp shlq $6,%rdx leaq (%rsi,%rdx,1),%rdi movl 0(%rbp),%eax movl 4(%rbp),%ebx movl 8(%rbp),%ecx movl 12(%rbp),%edx cmpq %rdi,%rsi je .Lend .Lloop: movl %eax,%r8d movl %ebx,%r9d movl %ecx,%r14d movl %edx,%r15d movl 0(%rsi),%r10d movl %edx,%r11d xorl %ecx,%r11d leal -680876936(%rax,%r10,1),%eax andl %ebx,%r11d xorl %edx,%r11d movl 4(%rsi),%r10d addl %r11d,%eax roll $7,%eax movl %ecx,%r11d addl %ebx,%eax xorl %ebx,%r11d leal -389564586(%rdx,%r10,1),%edx andl %eax,%r11d xorl %ecx,%r11d movl 8(%rsi),%r10d addl %r11d,%edx roll $12,%edx movl %ebx,%r11d addl %eax,%edx xorl %eax,%r11d leal 606105819(%rcx,%r10,1),%ecx andl %edx,%r11d xorl %ebx,%r11d movl 12(%rsi),%r10d addl %r11d,%ecx roll $17,%ecx movl %eax,%r11d addl %edx,%ecx xorl %edx,%r11d leal -1044525330(%rbx,%r10,1),%ebx andl %ecx,%r11d xorl %eax,%r11d movl 16(%rsi),%r10d addl %r11d,%ebx roll $22,%ebx movl %edx,%r11d addl %ecx,%ebx xorl %ecx,%r11d leal -176418897(%rax,%r10,1),%eax andl %ebx,%r11d xorl %edx,%r11d movl 20(%rsi),%r10d addl %r11d,%eax roll $7,%eax movl %ecx,%r11d addl %ebx,%eax xorl %ebx,%r11d leal 1200080426(%rdx,%r10,1),%edx andl %eax,%r11d xorl %ecx,%r11d movl 24(%rsi),%r10d addl %r11d,%edx roll $12,%edx movl %ebx,%r11d addl %eax,%edx xorl %eax,%r11d leal -1473231341(%rcx,%r10,1),%ecx andl %edx,%r11d xorl %ebx,%r11d movl 28(%rsi),%r10d addl %r11d,%ecx roll $17,%ecx movl %eax,%r11d addl %edx,%ecx xorl %edx,%r11d leal -45705983(%rbx,%r10,1),%ebx andl %ecx,%r11d xorl %eax,%r11d movl 32(%rsi),%r10d addl %r11d,%ebx roll $22,%ebx movl %edx,%r11d addl %ecx,%ebx xorl %ecx,%r11d leal 1770035416(%rax,%r10,1),%eax andl %ebx,%r11d xorl %edx,%r11d movl 36(%rsi),%r10d addl %r11d,%eax roll $7,%eax movl %ecx,%r11d addl %ebx,%eax xorl %ebx,%r11d leal -1958414417(%rdx,%r10,1),%edx andl %eax,%r11d xorl %ecx,%r11d movl 40(%rsi),%r10d addl %r11d,%edx roll $12,%edx movl %ebx,%r11d addl %eax,%edx xorl %eax,%r11d leal -42063(%rcx,%r10,1),%ecx andl %edx,%r11d xorl %ebx,%r11d movl 44(%rsi),%r10d addl %r11d,%ecx roll $17,%ecx movl %eax,%r11d addl %edx,%ecx xorl %edx,%r11d leal -1990404162(%rbx,%r10,1),%ebx andl %ecx,%r11d xorl %eax,%r11d movl 48(%rsi),%r10d addl %r11d,%ebx roll $22,%ebx movl %edx,%r11d addl %ecx,%ebx xorl %ecx,%r11d leal 1804603682(%rax,%r10,1),%eax andl %ebx,%r11d xorl %edx,%r11d movl 52(%rsi),%r10d addl %r11d,%eax roll $7,%eax movl %ecx,%r11d addl %ebx,%eax xorl %ebx,%r11d leal -40341101(%rdx,%r10,1),%edx andl %eax,%r11d xorl %ecx,%r11d movl 56(%rsi),%r10d addl %r11d,%edx roll $12,%edx movl %ebx,%r11d addl %eax,%edx xorl %eax,%r11d leal -1502002290(%rcx,%r10,1),%ecx andl %edx,%r11d xorl %ebx,%r11d movl 60(%rsi),%r10d addl %r11d,%ecx roll $17,%ecx movl %eax,%r11d addl %edx,%ecx xorl %edx,%r11d leal 1236535329(%rbx,%r10,1),%ebx andl %ecx,%r11d xorl %eax,%r11d movl 0(%rsi),%r10d addl %r11d,%ebx roll $22,%ebx movl %edx,%r11d addl %ecx,%ebx movl 4(%rsi),%r10d movl %edx,%r11d movl %edx,%r12d notl %r11d leal -165796510(%rax,%r10,1),%eax andl %ebx,%r12d andl %ecx,%r11d movl 24(%rsi),%r10d orl %r11d,%r12d movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d roll $5,%eax addl %ebx,%eax notl %r11d leal -1069501632(%rdx,%r10,1),%edx andl %eax,%r12d andl %ebx,%r11d movl 44(%rsi),%r10d orl %r11d,%r12d movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d roll $9,%edx addl %eax,%edx notl %r11d leal 643717713(%rcx,%r10,1),%ecx andl %edx,%r12d andl %eax,%r11d movl 0(%rsi),%r10d orl %r11d,%r12d movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d roll $14,%ecx addl %edx,%ecx notl %r11d leal -373897302(%rbx,%r10,1),%ebx andl %ecx,%r12d andl %edx,%r11d movl 20(%rsi),%r10d orl %r11d,%r12d movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d roll $20,%ebx addl %ecx,%ebx notl %r11d leal -701558691(%rax,%r10,1),%eax andl %ebx,%r12d andl %ecx,%r11d movl 40(%rsi),%r10d orl %r11d,%r12d movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d roll $5,%eax addl %ebx,%eax notl %r11d leal 38016083(%rdx,%r10,1),%edx andl %eax,%r12d andl %ebx,%r11d movl 60(%rsi),%r10d orl %r11d,%r12d movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d roll $9,%edx addl %eax,%edx notl %r11d leal -660478335(%rcx,%r10,1),%ecx andl %edx,%r12d andl %eax,%r11d movl 16(%rsi),%r10d orl %r11d,%r12d movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d roll $14,%ecx addl %edx,%ecx notl %r11d leal -405537848(%rbx,%r10,1),%ebx andl %ecx,%r12d andl %edx,%r11d movl 36(%rsi),%r10d orl %r11d,%r12d movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d roll $20,%ebx addl %ecx,%ebx notl %r11d leal 568446438(%rax,%r10,1),%eax andl %ebx,%r12d andl %ecx,%r11d movl 56(%rsi),%r10d orl %r11d,%r12d movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d roll $5,%eax addl %ebx,%eax notl %r11d leal -1019803690(%rdx,%r10,1),%edx andl %eax,%r12d andl %ebx,%r11d movl 12(%rsi),%r10d orl %r11d,%r12d movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d roll $9,%edx addl %eax,%edx notl %r11d leal -187363961(%rcx,%r10,1),%ecx andl %edx,%r12d andl %eax,%r11d movl 32(%rsi),%r10d orl %r11d,%r12d movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d roll $14,%ecx addl %edx,%ecx notl %r11d leal 1163531501(%rbx,%r10,1),%ebx andl %ecx,%r12d andl %edx,%r11d movl 52(%rsi),%r10d orl %r11d,%r12d movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d roll $20,%ebx addl %ecx,%ebx notl %r11d leal -1444681467(%rax,%r10,1),%eax andl %ebx,%r12d andl %ecx,%r11d movl 8(%rsi),%r10d orl %r11d,%r12d movl %ecx,%r11d addl %r12d,%eax movl %ecx,%r12d roll $5,%eax addl %ebx,%eax notl %r11d leal -51403784(%rdx,%r10,1),%edx andl %eax,%r12d andl %ebx,%r11d movl 28(%rsi),%r10d orl %r11d,%r12d movl %ebx,%r11d addl %r12d,%edx movl %ebx,%r12d roll $9,%edx addl %eax,%edx notl %r11d leal 1735328473(%rcx,%r10,1),%ecx andl %edx,%r12d andl %eax,%r11d movl 48(%rsi),%r10d orl %r11d,%r12d movl %eax,%r11d addl %r12d,%ecx movl %eax,%r12d roll $14,%ecx addl %edx,%ecx notl %r11d leal -1926607734(%rbx,%r10,1),%ebx andl %ecx,%r12d andl %edx,%r11d movl 0(%rsi),%r10d orl %r11d,%r12d movl %edx,%r11d addl %r12d,%ebx movl %edx,%r12d roll $20,%ebx addl %ecx,%ebx movl 20(%rsi),%r10d movl %ecx,%r11d leal -378558(%rax,%r10,1),%eax movl 32(%rsi),%r10d xorl %edx,%r11d xorl %ebx,%r11d addl %r11d,%eax roll $4,%eax movl %ebx,%r11d addl %ebx,%eax leal -2022574463(%rdx,%r10,1),%edx movl 44(%rsi),%r10d xorl %ecx,%r11d xorl %eax,%r11d addl %r11d,%edx roll $11,%edx movl %eax,%r11d addl %eax,%edx leal 1839030562(%rcx,%r10,1),%ecx movl 56(%rsi),%r10d xorl %ebx,%r11d xorl %edx,%r11d addl %r11d,%ecx roll $16,%ecx movl %edx,%r11d addl %edx,%ecx leal -35309556(%rbx,%r10,1),%ebx movl 4(%rsi),%r10d xorl %eax,%r11d xorl %ecx,%r11d addl %r11d,%ebx roll $23,%ebx movl %ecx,%r11d addl %ecx,%ebx leal -1530992060(%rax,%r10,1),%eax movl 16(%rsi),%r10d xorl %edx,%r11d xorl %ebx,%r11d addl %r11d,%eax roll $4,%eax movl %ebx,%r11d addl %ebx,%eax leal 1272893353(%rdx,%r10,1),%edx movl 28(%rsi),%r10d xorl %ecx,%r11d xorl %eax,%r11d addl %r11d,%edx roll $11,%edx movl %eax,%r11d addl %eax,%edx leal -155497632(%rcx,%r10,1),%ecx movl 40(%rsi),%r10d xorl %ebx,%r11d xorl %edx,%r11d addl %r11d,%ecx roll $16,%ecx movl %edx,%r11d addl %edx,%ecx leal -1094730640(%rbx,%r10,1),%ebx movl 52(%rsi),%r10d xorl %eax,%r11d xorl %ecx,%r11d addl %r11d,%ebx roll $23,%ebx movl %ecx,%r11d addl %ecx,%ebx leal 681279174(%rax,%r10,1),%eax movl 0(%rsi),%r10d xorl %edx,%r11d xorl %ebx,%r11d addl %r11d,%eax roll $4,%eax movl %ebx,%r11d addl %ebx,%eax leal -358537222(%rdx,%r10,1),%edx movl 12(%rsi),%r10d xorl %ecx,%r11d xorl %eax,%r11d addl %r11d,%edx roll $11,%edx movl %eax,%r11d addl %eax,%edx leal -722521979(%rcx,%r10,1),%ecx movl 24(%rsi),%r10d xorl %ebx,%r11d xorl %edx,%r11d addl %r11d,%ecx roll $16,%ecx movl %edx,%r11d addl %edx,%ecx leal 76029189(%rbx,%r10,1),%ebx movl 36(%rsi),%r10d xorl %eax,%r11d xorl %ecx,%r11d addl %r11d,%ebx roll $23,%ebx movl %ecx,%r11d addl %ecx,%ebx leal -640364487(%rax,%r10,1),%eax movl 48(%rsi),%r10d xorl %edx,%r11d xorl %ebx,%r11d addl %r11d,%eax roll $4,%eax movl %ebx,%r11d addl %ebx,%eax leal -421815835(%rdx,%r10,1),%edx movl 60(%rsi),%r10d xorl %ecx,%r11d xorl %eax,%r11d addl %r11d,%edx roll $11,%edx movl %eax,%r11d addl %eax,%edx leal 530742520(%rcx,%r10,1),%ecx movl 8(%rsi),%r10d xorl %ebx,%r11d xorl %edx,%r11d addl %r11d,%ecx roll $16,%ecx movl %edx,%r11d addl %edx,%ecx leal -995338651(%rbx,%r10,1),%ebx movl 0(%rsi),%r10d xorl %eax,%r11d xorl %ecx,%r11d addl %r11d,%ebx roll $23,%ebx movl %ecx,%r11d addl %ecx,%ebx movl 0(%rsi),%r10d movl $0xffffffff,%r11d xorl %edx,%r11d leal -198630844(%rax,%r10,1),%eax orl %ebx,%r11d xorl %ecx,%r11d addl %r11d,%eax movl 28(%rsi),%r10d movl $0xffffffff,%r11d roll $6,%eax xorl %ecx,%r11d addl %ebx,%eax leal 1126891415(%rdx,%r10,1),%edx orl %eax,%r11d xorl %ebx,%r11d addl %r11d,%edx movl 56(%rsi),%r10d movl $0xffffffff,%r11d roll $10,%edx xorl %ebx,%r11d addl %eax,%edx leal -1416354905(%rcx,%r10,1),%ecx orl %edx,%r11d xorl %eax,%r11d addl %r11d,%ecx movl 20(%rsi),%r10d movl $0xffffffff,%r11d roll $15,%ecx xorl %eax,%r11d addl %edx,%ecx leal -57434055(%rbx,%r10,1),%ebx orl %ecx,%r11d xorl %edx,%r11d addl %r11d,%ebx movl 48(%rsi),%r10d movl $0xffffffff,%r11d roll $21,%ebx xorl %edx,%r11d addl %ecx,%ebx leal 1700485571(%rax,%r10,1),%eax orl %ebx,%r11d xorl %ecx,%r11d addl %r11d,%eax movl 12(%rsi),%r10d movl $0xffffffff,%r11d roll $6,%eax xorl %ecx,%r11d addl %ebx,%eax leal -1894986606(%rdx,%r10,1),%edx orl %eax,%r11d xorl %ebx,%r11d addl %r11d,%edx movl 40(%rsi),%r10d movl $0xffffffff,%r11d roll $10,%edx xorl %ebx,%r11d addl %eax,%edx leal -1051523(%rcx,%r10,1),%ecx orl %edx,%r11d xorl %eax,%r11d addl %r11d,%ecx movl 4(%rsi),%r10d movl $0xffffffff,%r11d roll $15,%ecx xorl %eax,%r11d addl %edx,%ecx leal -2054922799(%rbx,%r10,1),%ebx orl %ecx,%r11d xorl %edx,%r11d addl %r11d,%ebx movl 32(%rsi),%r10d movl $0xffffffff,%r11d roll $21,%ebx xorl %edx,%r11d addl %ecx,%ebx leal 1873313359(%rax,%r10,1),%eax orl %ebx,%r11d xorl %ecx,%r11d addl %r11d,%eax movl 60(%rsi),%r10d movl $0xffffffff,%r11d roll $6,%eax xorl %ecx,%r11d addl %ebx,%eax leal -30611744(%rdx,%r10,1),%edx orl %eax,%r11d xorl %ebx,%r11d addl %r11d,%edx movl 24(%rsi),%r10d movl $0xffffffff,%r11d roll $10,%edx xorl %ebx,%r11d addl %eax,%edx leal -1560198380(%rcx,%r10,1),%ecx orl %edx,%r11d xorl %eax,%r11d addl %r11d,%ecx movl 52(%rsi),%r10d movl $0xffffffff,%r11d roll $15,%ecx xorl %eax,%r11d addl %edx,%ecx leal 1309151649(%rbx,%r10,1),%ebx orl %ecx,%r11d xorl %edx,%r11d addl %r11d,%ebx movl 16(%rsi),%r10d movl $0xffffffff,%r11d roll $21,%ebx xorl %edx,%r11d addl %ecx,%ebx leal -145523070(%rax,%r10,1),%eax orl %ebx,%r11d xorl %ecx,%r11d addl %r11d,%eax movl 44(%rsi),%r10d movl $0xffffffff,%r11d roll $6,%eax xorl %ecx,%r11d addl %ebx,%eax leal -1120210379(%rdx,%r10,1),%edx orl %eax,%r11d xorl %ebx,%r11d addl %r11d,%edx movl 8(%rsi),%r10d movl $0xffffffff,%r11d roll $10,%edx xorl %ebx,%r11d addl %eax,%edx leal 718787259(%rcx,%r10,1),%ecx orl %edx,%r11d xorl %eax,%r11d addl %r11d,%ecx movl 36(%rsi),%r10d movl $0xffffffff,%r11d roll $15,%ecx xorl %eax,%r11d addl %edx,%ecx leal -343485551(%rbx,%r10,1),%ebx orl %ecx,%r11d xorl %edx,%r11d addl %r11d,%ebx movl 0(%rsi),%r10d movl $0xffffffff,%r11d roll $21,%ebx xorl %edx,%r11d addl %ecx,%ebx addl %r8d,%eax addl %r9d,%ebx addl %r14d,%ecx addl %r15d,%edx addq $64,%rsi cmpq %rdi,%rsi jb .Lloop .Lend: movl %eax,0(%rbp) movl %ebx,4(%rbp) movl %ecx,8(%rbp) movl %edx,12(%rbp) movq (%rsp),%r15 movq 8(%rsp),%r14 movq 16(%rsp),%r12 movq 24(%rsp),%rbx movq 32(%rsp),%rbp addq $40,%rsp .Lepilogue: .byte 0xf3,0xc3 .size md5_block_asm_data_order,.-md5_block_asm_data_order Index: head/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S (revision 299481) @@ -1,1260 +1,1261 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rc4-md5-x86_64.pl. .text .align 16 .globl rc4_md5_enc .type rc4_md5_enc,@function rc4_md5_enc: cmpq $0,%r9 je .Labort pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $40,%rsp .Lbody: movq %rcx,%r11 movq %r9,%r12 movq %rsi,%r13 movq %rdx,%r14 movq %r8,%r15 xorq %rbp,%rbp xorq %rcx,%rcx leaq 8(%rdi),%rdi movb -8(%rdi),%bpl movb -4(%rdi),%cl incb %bpl subq %r13,%r14 movl (%rdi,%rbp,4),%eax addb %al,%cl leaq (%rdi,%rbp,4),%rsi shlq $6,%r12 addq %r15,%r12 movq %r12,16(%rsp) movq %r11,24(%rsp) movl 0(%r11),%r8d movl 4(%r11),%r9d movl 8(%r11),%r10d movl 12(%r11),%r11d jmp .Loop .align 16 .Loop: movl %r8d,0(%rsp) movl %r9d,4(%rsp) movl %r10d,8(%rsp) movl %r11d,%r12d movl %r11d,12(%rsp) pxor %xmm0,%xmm0 movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 0(%r15),%r8d addb %dl,%al movl 4(%rsi),%ebx addl $3614090360,%r8d xorl %r11d,%r12d movzbl %al,%eax movl %edx,0(%rsi) addl %r12d,%r8d addb %bl,%cl roll $7,%r8d movl %r10d,%r12d movd (%rdi,%rax,4),%xmm0 addl %r9d,%r8d pxor %xmm1,%xmm1 movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 4(%r15),%r11d addb %dl,%bl movl 8(%rsi),%eax addl $3905402710,%r11d xorl %r10d,%r12d movzbl %bl,%ebx movl %edx,4(%rsi) addl %r12d,%r11d addb %al,%cl roll $12,%r11d movl %r9d,%r12d movd (%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 8(%r15),%r10d addb %dl,%al movl 12(%rsi),%ebx addl $606105819,%r10d xorl %r9d,%r12d movzbl %al,%eax movl %edx,8(%rsi) addl %r12d,%r10d addb %bl,%cl roll $17,%r10d movl %r8d,%r12d pinsrw $1,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 12(%r15),%r9d addb %dl,%bl movl 16(%rsi),%eax addl $3250441966,%r9d xorl %r8d,%r12d movzbl %bl,%ebx movl %edx,12(%rsi) addl %r12d,%r9d addb %al,%cl roll $22,%r9d movl %r11d,%r12d pinsrw $1,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 16(%r15),%r8d addb %dl,%al movl 20(%rsi),%ebx addl $4118548399,%r8d xorl %r11d,%r12d movzbl %al,%eax movl %edx,16(%rsi) addl %r12d,%r8d addb %bl,%cl roll $7,%r8d movl %r10d,%r12d pinsrw $2,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 20(%r15),%r11d addb %dl,%bl movl 24(%rsi),%eax addl $1200080426,%r11d xorl %r10d,%r12d movzbl %bl,%ebx movl %edx,20(%rsi) addl %r12d,%r11d addb %al,%cl roll $12,%r11d movl %r9d,%r12d pinsrw $2,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 24(%r15),%r10d addb %dl,%al movl 28(%rsi),%ebx addl $2821735955,%r10d xorl %r9d,%r12d movzbl %al,%eax movl %edx,24(%rsi) addl %r12d,%r10d addb %bl,%cl roll $17,%r10d movl %r8d,%r12d pinsrw $3,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 28(%r15),%r9d addb %dl,%bl movl 32(%rsi),%eax addl $4249261313,%r9d xorl %r8d,%r12d movzbl %bl,%ebx movl %edx,28(%rsi) addl %r12d,%r9d addb %al,%cl roll $22,%r9d movl %r11d,%r12d pinsrw $3,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 32(%r15),%r8d addb %dl,%al movl 36(%rsi),%ebx addl $1770035416,%r8d xorl %r11d,%r12d movzbl %al,%eax movl %edx,32(%rsi) addl %r12d,%r8d addb %bl,%cl roll $7,%r8d movl %r10d,%r12d pinsrw $4,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 36(%r15),%r11d addb %dl,%bl movl 40(%rsi),%eax addl $2336552879,%r11d xorl %r10d,%r12d movzbl %bl,%ebx movl %edx,36(%rsi) addl %r12d,%r11d addb %al,%cl roll $12,%r11d movl %r9d,%r12d pinsrw $4,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 40(%r15),%r10d addb %dl,%al movl 44(%rsi),%ebx addl $4294925233,%r10d xorl %r9d,%r12d movzbl %al,%eax movl %edx,40(%rsi) addl %r12d,%r10d addb %bl,%cl roll $17,%r10d movl %r8d,%r12d pinsrw $5,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 44(%r15),%r9d addb %dl,%bl movl 48(%rsi),%eax addl $2304563134,%r9d xorl %r8d,%r12d movzbl %bl,%ebx movl %edx,44(%rsi) addl %r12d,%r9d addb %al,%cl roll $22,%r9d movl %r11d,%r12d pinsrw $5,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 48(%r15),%r8d addb %dl,%al movl 52(%rsi),%ebx addl $1804603682,%r8d xorl %r11d,%r12d movzbl %al,%eax movl %edx,48(%rsi) addl %r12d,%r8d addb %bl,%cl roll $7,%r8d movl %r10d,%r12d pinsrw $6,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 52(%r15),%r11d addb %dl,%bl movl 56(%rsi),%eax addl $4254626195,%r11d xorl %r10d,%r12d movzbl %bl,%ebx movl %edx,52(%rsi) addl %r12d,%r11d addb %al,%cl roll $12,%r11d movl %r9d,%r12d pinsrw $6,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 56(%r15),%r10d addb %dl,%al movl 60(%rsi),%ebx addl $2792965006,%r10d xorl %r9d,%r12d movzbl %al,%eax movl %edx,56(%rsi) addl %r12d,%r10d addb %bl,%cl roll $17,%r10d movl %r8d,%r12d pinsrw $7,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movdqu (%r13),%xmm2 movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 60(%r15),%r9d addb %dl,%bl movl 64(%rsi),%eax addl $1236535329,%r9d xorl %r8d,%r12d movzbl %bl,%ebx movl %edx,60(%rsi) addl %r12d,%r9d addb %al,%cl roll $22,%r9d movl %r10d,%r12d pinsrw $7,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d psllq $8,%xmm1 pxor %xmm0,%xmm2 pxor %xmm1,%xmm2 pxor %xmm0,%xmm0 movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 4(%r15),%r8d addb %dl,%al movl 68(%rsi),%ebx addl $4129170786,%r8d xorl %r10d,%r12d movzbl %al,%eax movl %edx,64(%rsi) addl %r12d,%r8d addb %bl,%cl roll $5,%r8d movl %r9d,%r12d movd (%rdi,%rax,4),%xmm0 addl %r9d,%r8d pxor %xmm1,%xmm1 movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 24(%r15),%r11d addb %dl,%bl movl 72(%rsi),%eax addl $3225465664,%r11d xorl %r9d,%r12d movzbl %bl,%ebx movl %edx,68(%rsi) addl %r12d,%r11d addb %al,%cl roll $9,%r11d movl %r8d,%r12d movd (%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 44(%r15),%r10d addb %dl,%al movl 76(%rsi),%ebx addl $643717713,%r10d xorl %r8d,%r12d movzbl %al,%eax movl %edx,72(%rsi) addl %r12d,%r10d addb %bl,%cl roll $14,%r10d movl %r11d,%r12d pinsrw $1,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 0(%r15),%r9d addb %dl,%bl movl 80(%rsi),%eax addl $3921069994,%r9d xorl %r11d,%r12d movzbl %bl,%ebx movl %edx,76(%rsi) addl %r12d,%r9d addb %al,%cl roll $20,%r9d movl %r10d,%r12d pinsrw $1,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 20(%r15),%r8d addb %dl,%al movl 84(%rsi),%ebx addl $3593408605,%r8d xorl %r10d,%r12d movzbl %al,%eax movl %edx,80(%rsi) addl %r12d,%r8d addb %bl,%cl roll $5,%r8d movl %r9d,%r12d pinsrw $2,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 40(%r15),%r11d addb %dl,%bl movl 88(%rsi),%eax addl $38016083,%r11d xorl %r9d,%r12d movzbl %bl,%ebx movl %edx,84(%rsi) addl %r12d,%r11d addb %al,%cl roll $9,%r11d movl %r8d,%r12d pinsrw $2,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 60(%r15),%r10d addb %dl,%al movl 92(%rsi),%ebx addl $3634488961,%r10d xorl %r8d,%r12d movzbl %al,%eax movl %edx,88(%rsi) addl %r12d,%r10d addb %bl,%cl roll $14,%r10d movl %r11d,%r12d pinsrw $3,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 16(%r15),%r9d addb %dl,%bl movl 96(%rsi),%eax addl $3889429448,%r9d xorl %r11d,%r12d movzbl %bl,%ebx movl %edx,92(%rsi) addl %r12d,%r9d addb %al,%cl roll $20,%r9d movl %r10d,%r12d pinsrw $3,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 36(%r15),%r8d addb %dl,%al movl 100(%rsi),%ebx addl $568446438,%r8d xorl %r10d,%r12d movzbl %al,%eax movl %edx,96(%rsi) addl %r12d,%r8d addb %bl,%cl roll $5,%r8d movl %r9d,%r12d pinsrw $4,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 56(%r15),%r11d addb %dl,%bl movl 104(%rsi),%eax addl $3275163606,%r11d xorl %r9d,%r12d movzbl %bl,%ebx movl %edx,100(%rsi) addl %r12d,%r11d addb %al,%cl roll $9,%r11d movl %r8d,%r12d pinsrw $4,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 12(%r15),%r10d addb %dl,%al movl 108(%rsi),%ebx addl $4107603335,%r10d xorl %r8d,%r12d movzbl %al,%eax movl %edx,104(%rsi) addl %r12d,%r10d addb %bl,%cl roll $14,%r10d movl %r11d,%r12d pinsrw $5,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 32(%r15),%r9d addb %dl,%bl movl 112(%rsi),%eax addl $1163531501,%r9d xorl %r11d,%r12d movzbl %bl,%ebx movl %edx,108(%rsi) addl %r12d,%r9d addb %al,%cl roll $20,%r9d movl %r10d,%r12d pinsrw $5,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) andl %r11d,%r12d addl 52(%r15),%r8d addb %dl,%al movl 116(%rsi),%ebx addl $2850285829,%r8d xorl %r10d,%r12d movzbl %al,%eax movl %edx,112(%rsi) addl %r12d,%r8d addb %bl,%cl roll $5,%r8d movl %r9d,%r12d pinsrw $6,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r10d,%r12d addl 8(%r15),%r11d addb %dl,%bl movl 120(%rsi),%eax addl $4243563512,%r11d xorl %r9d,%r12d movzbl %bl,%ebx movl %edx,116(%rsi) addl %r12d,%r11d addb %al,%cl roll $9,%r11d movl %r8d,%r12d pinsrw $6,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) andl %r9d,%r12d addl 28(%r15),%r10d addb %dl,%al movl 124(%rsi),%ebx addl $1735328473,%r10d xorl %r8d,%r12d movzbl %al,%eax movl %edx,120(%rsi) addl %r12d,%r10d addb %bl,%cl roll $14,%r10d movl %r11d,%r12d pinsrw $7,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movdqu 16(%r13),%xmm3 addb $32,%bpl movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) andl %r8d,%r12d addl 48(%r15),%r9d addb %dl,%bl movl 0(%rdi,%rbp,4),%eax addl $2368359562,%r9d xorl %r11d,%r12d movzbl %bl,%ebx movl %edx,124(%rsi) addl %r12d,%r9d addb %al,%cl roll $20,%r9d movl %r11d,%r12d pinsrw $7,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movq %rcx,%rsi xorq %rcx,%rcx movb %sil,%cl leaq (%rdi,%rbp,4),%rsi psllq $8,%xmm1 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 pxor %xmm0,%xmm0 movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r9d,%r12d addl 20(%r15),%r8d addb %dl,%al movl 4(%rsi),%ebx addl $4294588738,%r8d movzbl %al,%eax addl %r12d,%r8d movl %edx,0(%rsi) addb %bl,%cl roll $4,%r8d movl %r10d,%r12d movd (%rdi,%rax,4),%xmm0 addl %r9d,%r8d pxor %xmm1,%xmm1 movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r8d,%r12d addl 32(%r15),%r11d addb %dl,%bl movl 8(%rsi),%eax addl $2272392833,%r11d movzbl %bl,%ebx addl %r12d,%r11d movl %edx,4(%rsi) addb %al,%cl roll $11,%r11d movl %r9d,%r12d movd (%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r11d,%r12d addl 44(%r15),%r10d addb %dl,%al movl 12(%rsi),%ebx addl $1839030562,%r10d movzbl %al,%eax addl %r12d,%r10d movl %edx,8(%rsi) addb %bl,%cl roll $16,%r10d movl %r8d,%r12d pinsrw $1,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r10d,%r12d addl 56(%r15),%r9d addb %dl,%bl movl 16(%rsi),%eax addl $4259657740,%r9d movzbl %bl,%ebx addl %r12d,%r9d movl %edx,12(%rsi) addb %al,%cl roll $23,%r9d movl %r11d,%r12d pinsrw $1,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r9d,%r12d addl 4(%r15),%r8d addb %dl,%al movl 20(%rsi),%ebx addl $2763975236,%r8d movzbl %al,%eax addl %r12d,%r8d movl %edx,16(%rsi) addb %bl,%cl roll $4,%r8d movl %r10d,%r12d pinsrw $2,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r8d,%r12d addl 16(%r15),%r11d addb %dl,%bl movl 24(%rsi),%eax addl $1272893353,%r11d movzbl %bl,%ebx addl %r12d,%r11d movl %edx,20(%rsi) addb %al,%cl roll $11,%r11d movl %r9d,%r12d pinsrw $2,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r11d,%r12d addl 28(%r15),%r10d addb %dl,%al movl 28(%rsi),%ebx addl $4139469664,%r10d movzbl %al,%eax addl %r12d,%r10d movl %edx,24(%rsi) addb %bl,%cl roll $16,%r10d movl %r8d,%r12d pinsrw $3,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r10d,%r12d addl 40(%r15),%r9d addb %dl,%bl movl 32(%rsi),%eax addl $3200236656,%r9d movzbl %bl,%ebx addl %r12d,%r9d movl %edx,28(%rsi) addb %al,%cl roll $23,%r9d movl %r11d,%r12d pinsrw $3,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r9d,%r12d addl 52(%r15),%r8d addb %dl,%al movl 36(%rsi),%ebx addl $681279174,%r8d movzbl %al,%eax addl %r12d,%r8d movl %edx,32(%rsi) addb %bl,%cl roll $4,%r8d movl %r10d,%r12d pinsrw $4,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r8d,%r12d addl 0(%r15),%r11d addb %dl,%bl movl 40(%rsi),%eax addl $3936430074,%r11d movzbl %bl,%ebx addl %r12d,%r11d movl %edx,36(%rsi) addb %al,%cl roll $11,%r11d movl %r9d,%r12d pinsrw $4,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r11d,%r12d addl 12(%r15),%r10d addb %dl,%al movl 44(%rsi),%ebx addl $3572445317,%r10d movzbl %al,%eax addl %r12d,%r10d movl %edx,40(%rsi) addb %bl,%cl roll $16,%r10d movl %r8d,%r12d pinsrw $5,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r10d,%r12d addl 24(%r15),%r9d addb %dl,%bl movl 48(%rsi),%eax addl $76029189,%r9d movzbl %bl,%ebx addl %r12d,%r9d movl %edx,44(%rsi) addb %al,%cl roll $23,%r9d movl %r11d,%r12d pinsrw $5,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r9d,%r12d addl 36(%r15),%r8d addb %dl,%al movl 52(%rsi),%ebx addl $3654602809,%r8d movzbl %al,%eax addl %r12d,%r8d movl %edx,48(%rsi) addb %bl,%cl roll $4,%r8d movl %r10d,%r12d pinsrw $6,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r8d,%r12d addl 48(%r15),%r11d addb %dl,%bl movl 56(%rsi),%eax addl $3873151461,%r11d movzbl %bl,%ebx addl %r12d,%r11d movl %edx,52(%rsi) addb %al,%cl roll $11,%r11d movl %r9d,%r12d pinsrw $6,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %eax,(%rdi,%rcx,4) xorl %r11d,%r12d addl 60(%r15),%r10d addb %dl,%al movl 60(%rsi),%ebx addl $530742520,%r10d movzbl %al,%eax addl %r12d,%r10d movl %edx,56(%rsi) addb %bl,%cl roll $16,%r10d movl %r8d,%r12d pinsrw $7,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movdqu 32(%r13),%xmm4 movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %ebx,(%rdi,%rcx,4) xorl %r10d,%r12d addl 8(%r15),%r9d addb %dl,%bl movl 64(%rsi),%eax addl $3299628645,%r9d movzbl %bl,%ebx addl %r12d,%r9d movl %edx,60(%rsi) addb %al,%cl roll $23,%r9d movl $-1,%r12d pinsrw $7,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d psllq $8,%xmm1 pxor %xmm0,%xmm4 pxor %xmm1,%xmm4 pxor %xmm0,%xmm0 movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) orl %r9d,%r12d addl 0(%r15),%r8d addb %dl,%al movl 68(%rsi),%ebx addl $4096336452,%r8d movzbl %al,%eax xorl %r10d,%r12d movl %edx,64(%rsi) addl %r12d,%r8d addb %bl,%cl roll $6,%r8d movl $-1,%r12d movd (%rdi,%rax,4),%xmm0 addl %r9d,%r8d pxor %xmm1,%xmm1 movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r8d,%r12d addl 28(%r15),%r11d addb %dl,%bl movl 72(%rsi),%eax addl $1126891415,%r11d movzbl %bl,%ebx xorl %r9d,%r12d movl %edx,68(%rsi) addl %r12d,%r11d addb %al,%cl roll $10,%r11d movl $-1,%r12d movd (%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) orl %r11d,%r12d addl 56(%r15),%r10d addb %dl,%al movl 76(%rsi),%ebx addl $2878612391,%r10d movzbl %al,%eax xorl %r8d,%r12d movl %edx,72(%rsi) addl %r12d,%r10d addb %bl,%cl roll $15,%r10d movl $-1,%r12d pinsrw $1,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r10d,%r12d addl 20(%r15),%r9d addb %dl,%bl movl 80(%rsi),%eax addl $4237533241,%r9d movzbl %bl,%ebx xorl %r11d,%r12d movl %edx,76(%rsi) addl %r12d,%r9d addb %al,%cl roll $21,%r9d movl $-1,%r12d pinsrw $1,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) orl %r9d,%r12d addl 48(%r15),%r8d addb %dl,%al movl 84(%rsi),%ebx addl $1700485571,%r8d movzbl %al,%eax xorl %r10d,%r12d movl %edx,80(%rsi) addl %r12d,%r8d addb %bl,%cl roll $6,%r8d movl $-1,%r12d pinsrw $2,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r8d,%r12d addl 12(%r15),%r11d addb %dl,%bl movl 88(%rsi),%eax addl $2399980690,%r11d movzbl %bl,%ebx xorl %r9d,%r12d movl %edx,84(%rsi) addl %r12d,%r11d addb %al,%cl roll $10,%r11d movl $-1,%r12d pinsrw $2,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) orl %r11d,%r12d addl 40(%r15),%r10d addb %dl,%al movl 92(%rsi),%ebx addl $4293915773,%r10d movzbl %al,%eax xorl %r8d,%r12d movl %edx,88(%rsi) addl %r12d,%r10d addb %bl,%cl roll $15,%r10d movl $-1,%r12d pinsrw $3,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r10d,%r12d addl 4(%r15),%r9d addb %dl,%bl movl 96(%rsi),%eax addl $2240044497,%r9d movzbl %bl,%ebx xorl %r11d,%r12d movl %edx,92(%rsi) addl %r12d,%r9d addb %al,%cl roll $21,%r9d movl $-1,%r12d pinsrw $3,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) orl %r9d,%r12d addl 32(%r15),%r8d addb %dl,%al movl 100(%rsi),%ebx addl $1873313359,%r8d movzbl %al,%eax xorl %r10d,%r12d movl %edx,96(%rsi) addl %r12d,%r8d addb %bl,%cl roll $6,%r8d movl $-1,%r12d pinsrw $4,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r8d,%r12d addl 60(%r15),%r11d addb %dl,%bl movl 104(%rsi),%eax addl $4264355552,%r11d movzbl %bl,%ebx xorl %r9d,%r12d movl %edx,100(%rsi) addl %r12d,%r11d addb %al,%cl roll $10,%r11d movl $-1,%r12d pinsrw $4,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) orl %r11d,%r12d addl 24(%r15),%r10d addb %dl,%al movl 108(%rsi),%ebx addl $2734768916,%r10d movzbl %al,%eax xorl %r8d,%r12d movl %edx,104(%rsi) addl %r12d,%r10d addb %bl,%cl roll $15,%r10d movl $-1,%r12d pinsrw $5,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r10d,%r12d addl 52(%r15),%r9d addb %dl,%bl movl 112(%rsi),%eax addl $1309151649,%r9d movzbl %bl,%ebx xorl %r11d,%r12d movl %edx,108(%rsi) addl %r12d,%r9d addb %al,%cl roll $21,%r9d movl $-1,%r12d pinsrw $5,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movl (%rdi,%rcx,4),%edx xorl %r11d,%r12d movl %eax,(%rdi,%rcx,4) orl %r9d,%r12d addl 16(%r15),%r8d addb %dl,%al movl 116(%rsi),%ebx addl $4149444226,%r8d movzbl %al,%eax xorl %r10d,%r12d movl %edx,112(%rsi) addl %r12d,%r8d addb %bl,%cl roll $6,%r8d movl $-1,%r12d pinsrw $6,(%rdi,%rax,4),%xmm0 addl %r9d,%r8d movl (%rdi,%rcx,4),%edx xorl %r10d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r8d,%r12d addl 44(%r15),%r11d addb %dl,%bl movl 120(%rsi),%eax addl $3174756917,%r11d movzbl %bl,%ebx xorl %r9d,%r12d movl %edx,116(%rsi) addl %r12d,%r11d addb %al,%cl roll $10,%r11d movl $-1,%r12d pinsrw $6,(%rdi,%rbx,4),%xmm1 addl %r8d,%r11d movl (%rdi,%rcx,4),%edx xorl %r9d,%r12d movl %eax,(%rdi,%rcx,4) orl %r11d,%r12d addl 8(%r15),%r10d addb %dl,%al movl 124(%rsi),%ebx addl $718787259,%r10d movzbl %al,%eax xorl %r8d,%r12d movl %edx,120(%rsi) addl %r12d,%r10d addb %bl,%cl roll $15,%r10d movl $-1,%r12d pinsrw $7,(%rdi,%rax,4),%xmm0 addl %r11d,%r10d movdqu 48(%r13),%xmm5 addb $32,%bpl movl (%rdi,%rcx,4),%edx xorl %r8d,%r12d movl %ebx,(%rdi,%rcx,4) orl %r10d,%r12d addl 36(%r15),%r9d addb %dl,%bl movl 0(%rdi,%rbp,4),%eax addl $3951481745,%r9d movzbl %bl,%ebx xorl %r11d,%r12d movl %edx,124(%rsi) addl %r12d,%r9d addb %al,%cl roll $21,%r9d movl $-1,%r12d pinsrw $7,(%rdi,%rbx,4),%xmm1 addl %r10d,%r9d movq %rbp,%rsi xorq %rbp,%rbp movb %sil,%bpl movq %rcx,%rsi xorq %rcx,%rcx movb %sil,%cl leaq (%rdi,%rbp,4),%rsi psllq $8,%xmm1 pxor %xmm0,%xmm5 pxor %xmm1,%xmm5 addl 0(%rsp),%r8d addl 4(%rsp),%r9d addl 8(%rsp),%r10d addl 12(%rsp),%r11d movdqu %xmm2,(%r14,%r13,1) movdqu %xmm3,16(%r14,%r13,1) movdqu %xmm4,32(%r14,%r13,1) movdqu %xmm5,48(%r14,%r13,1) leaq 64(%r15),%r15 leaq 64(%r13),%r13 cmpq 16(%rsp),%r15 jb .Loop movq 24(%rsp),%r12 subb %al,%cl movl %r8d,0(%r12) movl %r9d,4(%r12) movl %r10d,8(%r12) movl %r11d,12(%r12) subb $1,%bpl movl %ebp,-8(%rdi) movl %ecx,-4(%rdi) movq 40(%rsp),%r15 movq 48(%rsp),%r14 movq 56(%rsp),%r13 movq 64(%rsp),%r12 movq 72(%rsp),%rbp movq 80(%rsp),%rbx leaq 88(%rsp),%rsp .Lepilogue: .Labort: .byte 0xf3,0xc3 .size rc4_md5_enc,.-rc4_md5_enc Index: head/secure/lib/libcrypto/amd64/rc4-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/rc4-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/rc4-x86_64.S (revision 299481) @@ -1,616 +1,617 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rc4-x86_64.pl. .text .globl RC4 .type RC4,@function .align 16 RC4: orq %rsi,%rsi jne .Lentry .byte 0xf3,0xc3 .Lentry: pushq %rbx pushq %r12 pushq %r13 .Lprologue: movq %rsi,%r11 movq %rdx,%r12 movq %rcx,%r13 xorq %r10,%r10 xorq %rcx,%rcx leaq 8(%rdi),%rdi movb -8(%rdi),%r10b movb -4(%rdi),%cl cmpl $-1,256(%rdi) je .LRC4_CHAR movl OPENSSL_ia32cap_P(%rip),%r8d xorq %rbx,%rbx incb %r10b subq %r10,%rbx subq %r12,%r13 movl (%rdi,%r10,4),%eax testq $-16,%r11 jz .Lloop1 btl $30,%r8d jc .Lintel andq $7,%rbx leaq 1(%r10),%rsi jz .Loop8 subq %rbx,%r11 .Loop8_warmup: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl %edx,(%rdi,%r10,4) addb %dl,%al incb %r10b movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx jnz .Loop8_warmup leaq 1(%r10),%rsi jmp .Loop8 .align 16 .Loop8: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl 0(%rdi,%rsi,4),%ebx rorq $8,%r8 movl %edx,0(%rdi,%r10,4) addb %al,%dl movb (%rdi,%rdx,4),%r8b addb %bl,%cl movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) movl 4(%rdi,%rsi,4),%eax rorq $8,%r8 movl %edx,4(%rdi,%r10,4) addb %bl,%dl movb (%rdi,%rdx,4),%r8b addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl 8(%rdi,%rsi,4),%ebx rorq $8,%r8 movl %edx,8(%rdi,%r10,4) addb %al,%dl movb (%rdi,%rdx,4),%r8b addb %bl,%cl movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) movl 12(%rdi,%rsi,4),%eax rorq $8,%r8 movl %edx,12(%rdi,%r10,4) addb %bl,%dl movb (%rdi,%rdx,4),%r8b addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl 16(%rdi,%rsi,4),%ebx rorq $8,%r8 movl %edx,16(%rdi,%r10,4) addb %al,%dl movb (%rdi,%rdx,4),%r8b addb %bl,%cl movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) movl 20(%rdi,%rsi,4),%eax rorq $8,%r8 movl %edx,20(%rdi,%r10,4) addb %bl,%dl movb (%rdi,%rdx,4),%r8b addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl 24(%rdi,%rsi,4),%ebx rorq $8,%r8 movl %edx,24(%rdi,%r10,4) addb %al,%dl movb (%rdi,%rdx,4),%r8b addb $8,%sil addb %bl,%cl movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) movl -4(%rdi,%rsi,4),%eax rorq $8,%r8 movl %edx,28(%rdi,%r10,4) addb %bl,%dl movb (%rdi,%rdx,4),%r8b addb $8,%r10b rorq $8,%r8 subq $8,%r11 xorq (%r12),%r8 movq %r8,(%r12,%r13,1) leaq 8(%r12),%r12 testq $-8,%r11 jnz .Loop8 cmpq $0,%r11 jne .Lloop1 jmp .Lexit .align 16 .Lintel: testq $-32,%r11 jz .Lloop1 andq $15,%rbx jz .Loop16_is_hot subq %rbx,%r11 .Loop16_warmup: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl %edx,(%rdi,%r10,4) addb %dl,%al incb %r10b movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx jnz .Loop16_warmup movq %rcx,%rbx xorq %rcx,%rcx movb %bl,%cl .Loop16_is_hot: leaq (%rdi,%r10,4),%rsi addb %al,%cl movl (%rdi,%rcx,4),%edx pxor %xmm0,%xmm0 movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 4(%rsi),%ebx movzbl %al,%eax movl %edx,0(%rsi) addb %bl,%cl pinsrw $0,(%rdi,%rax,4),%xmm0 jmp .Loop16_enter .align 16 .Loop16: addb %al,%cl movl (%rdi,%rcx,4),%edx pxor %xmm0,%xmm2 psllq $8,%xmm1 pxor %xmm0,%xmm0 movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 4(%rsi),%ebx movzbl %al,%eax movl %edx,0(%rsi) pxor %xmm1,%xmm2 addb %bl,%cl pinsrw $0,(%rdi,%rax,4),%xmm0 movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 .Loop16_enter: movl (%rdi,%rcx,4),%edx pxor %xmm1,%xmm1 movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 8(%rsi),%eax movzbl %bl,%ebx movl %edx,4(%rsi) addb %al,%cl pinsrw $0,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 12(%rsi),%ebx movzbl %al,%eax movl %edx,8(%rsi) addb %bl,%cl pinsrw $1,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 16(%rsi),%eax movzbl %bl,%ebx movl %edx,12(%rsi) addb %al,%cl pinsrw $1,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 20(%rsi),%ebx movzbl %al,%eax movl %edx,16(%rsi) addb %bl,%cl pinsrw $2,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 24(%rsi),%eax movzbl %bl,%ebx movl %edx,20(%rsi) addb %al,%cl pinsrw $2,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 28(%rsi),%ebx movzbl %al,%eax movl %edx,24(%rsi) addb %bl,%cl pinsrw $3,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 32(%rsi),%eax movzbl %bl,%ebx movl %edx,28(%rsi) addb %al,%cl pinsrw $3,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 36(%rsi),%ebx movzbl %al,%eax movl %edx,32(%rsi) addb %bl,%cl pinsrw $4,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 40(%rsi),%eax movzbl %bl,%ebx movl %edx,36(%rsi) addb %al,%cl pinsrw $4,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 44(%rsi),%ebx movzbl %al,%eax movl %edx,40(%rsi) addb %bl,%cl pinsrw $5,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 48(%rsi),%eax movzbl %bl,%ebx movl %edx,44(%rsi) addb %al,%cl pinsrw $5,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 52(%rsi),%ebx movzbl %al,%eax movl %edx,48(%rsi) addb %bl,%cl pinsrw $6,(%rdi,%rax,4),%xmm0 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movl 56(%rsi),%eax movzbl %bl,%ebx movl %edx,52(%rsi) addb %al,%cl pinsrw $6,(%rdi,%rbx,4),%xmm1 movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) addb %dl,%al movl 60(%rsi),%ebx movzbl %al,%eax movl %edx,56(%rsi) addb %bl,%cl pinsrw $7,(%rdi,%rax,4),%xmm0 addb $16,%r10b movdqu (%r12),%xmm2 movl (%rdi,%rcx,4),%edx movl %ebx,(%rdi,%rcx,4) addb %dl,%bl movzbl %bl,%ebx movl %edx,60(%rsi) leaq (%rdi,%r10,4),%rsi pinsrw $7,(%rdi,%rbx,4),%xmm1 movl (%rsi),%eax movq %rcx,%rbx xorq %rcx,%rcx subq $16,%r11 movb %bl,%cl testq $-16,%r11 jnz .Loop16 psllq $8,%xmm1 pxor %xmm0,%xmm2 pxor %xmm1,%xmm2 movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 cmpq $0,%r11 jne .Lloop1 jmp .Lexit .align 16 .Lloop1: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) movl %edx,(%rdi,%r10,4) addb %dl,%al incb %r10b movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %r11 jnz .Lloop1 jmp .Lexit .align 16 .LRC4_CHAR: addb $1,%r10b movzbl (%rdi,%r10,1),%eax testq $-8,%r11 jz .Lcloop1 jmp .Lcloop8 .align 16 .Lcloop8: movl (%r12),%r8d movl 4(%r12),%r9d addb %al,%cl leaq 1(%r10),%rsi movzbl (%rdi,%rcx,1),%edx movzbl %sil,%esi movzbl (%rdi,%rsi,1),%ebx movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov0 movq %rax,%rbx .Lcmov0: addb %al,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d addb %bl,%cl leaq 1(%rsi),%r10 movzbl (%rdi,%rcx,1),%edx movzbl %r10b,%r10d movzbl (%rdi,%r10,1),%eax movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov1 movq %rbx,%rax .Lcmov1: addb %bl,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d addb %al,%cl leaq 1(%r10),%rsi movzbl (%rdi,%rcx,1),%edx movzbl %sil,%esi movzbl (%rdi,%rsi,1),%ebx movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov2 movq %rax,%rbx .Lcmov2: addb %al,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d addb %bl,%cl leaq 1(%rsi),%r10 movzbl (%rdi,%rcx,1),%edx movzbl %r10b,%r10d movzbl (%rdi,%r10,1),%eax movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov3 movq %rbx,%rax .Lcmov3: addb %bl,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d addb %al,%cl leaq 1(%r10),%rsi movzbl (%rdi,%rcx,1),%edx movzbl %sil,%esi movzbl (%rdi,%rsi,1),%ebx movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov4 movq %rax,%rbx .Lcmov4: addb %al,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d addb %bl,%cl leaq 1(%rsi),%r10 movzbl (%rdi,%rcx,1),%edx movzbl %r10b,%r10d movzbl (%rdi,%r10,1),%eax movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov5 movq %rbx,%rax .Lcmov5: addb %bl,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d addb %al,%cl leaq 1(%r10),%rsi movzbl (%rdi,%rcx,1),%edx movzbl %sil,%esi movzbl (%rdi,%rsi,1),%ebx movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov6 movq %rax,%rbx .Lcmov6: addb %al,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d addb %bl,%cl leaq 1(%rsi),%r10 movzbl (%rdi,%rcx,1),%edx movzbl %r10b,%r10d movzbl (%rdi,%r10,1),%eax movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov7 movq %rbx,%rax .Lcmov7: addb %bl,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d leaq -8(%r11),%r11 movl %r8d,(%r13) leaq 8(%r12),%r12 movl %r9d,4(%r13) leaq 8(%r13),%r13 testq $-8,%r11 jnz .Lcloop8 cmpq $0,%r11 jne .Lcloop1 jmp .Lexit .align 16 .Lcloop1: addb %al,%cl movzbl %cl,%ecx movzbl (%rdi,%rcx,1),%edx movb %al,(%rdi,%rcx,1) movb %dl,(%rdi,%r10,1) addb %al,%dl addb $1,%r10b movzbl %dl,%edx movzbl %r10b,%r10d movzbl (%rdi,%rdx,1),%edx movzbl (%rdi,%r10,1),%eax xorb (%r12),%dl leaq 1(%r12),%r12 movb %dl,(%r13) leaq 1(%r13),%r13 subq $1,%r11 jnz .Lcloop1 jmp .Lexit .align 16 .Lexit: subb $1,%r10b movl %r10d,-8(%rdi) movl %ecx,-4(%rdi) movq (%rsp),%r13 movq 8(%rsp),%r12 movq 16(%rsp),%rbx addq $24,%rsp .Lepilogue: .byte 0xf3,0xc3 .size RC4,.-RC4 .globl private_RC4_set_key .type private_RC4_set_key,@function .align 16 private_RC4_set_key: leaq 8(%rdi),%rdi leaq (%rdx,%rsi,1),%rdx negq %rsi movq %rsi,%rcx xorl %eax,%eax xorq %r9,%r9 xorq %r10,%r10 xorq %r11,%r11 movl OPENSSL_ia32cap_P(%rip),%r8d btl $20,%r8d jc .Lc1stloop jmp .Lw1stloop .align 16 .Lw1stloop: movl %eax,(%rdi,%rax,4) addb $1,%al jnc .Lw1stloop xorq %r9,%r9 xorq %r8,%r8 .align 16 .Lw2ndloop: movl (%rdi,%r9,4),%r10d addb (%rdx,%rsi,1),%r8b addb %r10b,%r8b addq $1,%rsi movl (%rdi,%r8,4),%r11d cmovzq %rcx,%rsi movl %r10d,(%rdi,%r8,4) movl %r11d,(%rdi,%r9,4) addb $1,%r9b jnc .Lw2ndloop jmp .Lexit_key .align 16 .Lc1stloop: movb %al,(%rdi,%rax,1) addb $1,%al jnc .Lc1stloop xorq %r9,%r9 xorq %r8,%r8 .align 16 .Lc2ndloop: movb (%rdi,%r9,1),%r10b addb (%rdx,%rsi,1),%r8b addb %r10b,%r8b addq $1,%rsi movb (%rdi,%r8,1),%r11b jnz .Lcnowrap movq %rcx,%rsi .Lcnowrap: movb %r10b,(%rdi,%r8,1) movb %r11b,(%rdi,%r9,1) addb $1,%r9b jnc .Lc2ndloop movl $-1,256(%rdi) .align 16 .Lexit_key: xorl %eax,%eax movl %eax,-8(%rdi) movl %eax,-4(%rdi) .byte 0xf3,0xc3 .size private_RC4_set_key,.-private_RC4_set_key .globl RC4_options .type RC4_options,@function .align 16 RC4_options: leaq .Lopts(%rip),%rax movl OPENSSL_ia32cap_P(%rip),%edx btl $20,%edx jc .L8xchar btl $30,%edx jnc .Ldone addq $25,%rax .byte 0xf3,0xc3 .L8xchar: addq $12,%rax .Ldone: .byte 0xf3,0xc3 .align 64 .Lopts: .byte 114,99,52,40,56,120,44,105,110,116,41,0 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0 .byte 114,99,52,40,49,54,120,44,105,110,116,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-RC4_options Index: head/secure/lib/libcrypto/amd64/rsaz-avx2.S =================================================================== --- head/secure/lib/libcrypto/amd64/rsaz-avx2.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/rsaz-avx2.S (revision 299481) @@ -1,26 +1,1698 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rsaz-avx2.pl. .text -.globl rsaz_avx2_eligible -.type rsaz_avx2_eligible,@function -rsaz_avx2_eligible: - xorl %eax,%eax - .byte 0xf3,0xc3 -.size rsaz_avx2_eligible,.-rsaz_avx2_eligible - .globl rsaz_1024_sqr_avx2 -.globl rsaz_1024_mul_avx2 -.globl rsaz_1024_norm2red_avx2 -.globl rsaz_1024_red2norm_avx2 -.globl rsaz_1024_scatter5_avx2 -.globl rsaz_1024_gather5_avx2 .type rsaz_1024_sqr_avx2,@function +.align 64 rsaz_1024_sqr_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + movq %rax,%rbp + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz .Lsqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +.Lsqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vpbroadcastq .Land_mask(%rip),%ymm15 + jmp .LOOP_GRANDE_SQR_1024 + +.align 32 +.LOOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp .Lsqr_entry_1024 +.align 32 +.LOOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +.Lsqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz .LOOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $0x93,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $0x1fffffff,%eax + + movl $9,%r14d + jmp .LOOP_REDUCE_1024 + +.align 32 +.LOOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz .LOOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne .LOOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lsqr_1024_epilogue: + .byte 0xf3,0xc3 +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.type rsaz_1024_mul_avx2,@function +.align 64 rsaz_1024_mul_avx2: -rsaz_1024_norm2red_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rax,%rbp + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz .Lmul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +.Lmul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu .Land_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp .Loop_mul_1024 + +.align 32 +.Loop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $0x1fffffff,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $0x93,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm9 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm9,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + imull %r8d,%eax + andl $0x1fffffff,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $0x1fffffff,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $0x1fffffff,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz .Loop_mul_1024 + vpermq $0,%ymm15,%ymm15 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $0x93,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_1024_epilogue: + .byte 0xf3,0xc3 +.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 +.globl rsaz_1024_red2norm_avx2 +.type rsaz_1024_red2norm_avx2,@function +.align 32 rsaz_1024_red2norm_avx2: + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + .byte 0xf3,0xc3 +.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 + +.globl rsaz_1024_norm2red_avx2 +.type rsaz_1024_norm2red_avx2,@function +.align 32 +rsaz_1024_norm2red_avx2: + subq $-128,%rdi + movq (%rsi),%r8 + movl $0x1fffffff,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + .byte 0xf3,0xc3 +.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 +.globl rsaz_1024_scatter5_avx2 +.type rsaz_1024_scatter5_avx2,@function +.align 32 rsaz_1024_scatter5_avx2: + vzeroupper + vmovdqu .Lscatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp .Loop_scatter_1024 + +.align 32 +.Loop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz .Loop_scatter_1024 + + vzeroupper + .byte 0xf3,0xc3 +.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 + +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_gather5_avx2,@function +.align 32 rsaz_1024_gather5_avx2: -.byte 0x0f,0x0b + vzeroupper + movq %rsp,%r11 + leaq -256(%rsp),%rsp + andq $-32,%rsp + leaq .Linc(%rip),%r10 + leaq -128(%rsp),%rax + + vmovd %edx,%xmm4 + vmovdqa (%r10),%ymm0 + vmovdqa 32(%r10),%ymm1 + vmovdqa 64(%r10),%ymm5 + vpbroadcastd %xmm4,%ymm4 + + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,0+128(%rax) + vpaddd %ymm5,%ymm2,%ymm0 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,32+128(%rax) + vpaddd %ymm5,%ymm3,%ymm1 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,64+128(%rax) + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vmovdqa %ymm3,96+128(%rax) + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,128+128(%rax) + vpaddd %ymm5,%ymm2,%ymm8 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,160+128(%rax) + vpaddd %ymm5,%ymm3,%ymm9 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,192+128(%rax) + vpaddd %ymm5,%ymm8,%ymm10 + vpcmpeqd %ymm4,%ymm8,%ymm8 + vmovdqa %ymm3,224+128(%rax) + vpaddd %ymm5,%ymm9,%ymm11 + vpcmpeqd %ymm4,%ymm9,%ymm9 + vpaddd %ymm5,%ymm10,%ymm12 + vpcmpeqd %ymm4,%ymm10,%ymm10 + vpaddd %ymm5,%ymm11,%ymm13 + vpcmpeqd %ymm4,%ymm11,%ymm11 + vpaddd %ymm5,%ymm12,%ymm14 + vpcmpeqd %ymm4,%ymm12,%ymm12 + vpaddd %ymm5,%ymm13,%ymm15 + vpcmpeqd %ymm4,%ymm13,%ymm13 + vpcmpeqd %ymm4,%ymm14,%ymm14 + vpcmpeqd %ymm4,%ymm15,%ymm15 + + vmovdqa -32(%r10),%ymm7 + leaq 128(%rsi),%rsi + movl $9,%edx + +.Loop_gather_1024: + vmovdqa 0-128(%rsi),%ymm0 + vmovdqa 32-128(%rsi),%ymm1 + vmovdqa 64-128(%rsi),%ymm2 + vmovdqa 96-128(%rsi),%ymm3 + vpand 0+128(%rax),%ymm0,%ymm0 + vpand 32+128(%rax),%ymm1,%ymm1 + vpand 64+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm1,%ymm4 + vpand 96+128(%rax),%ymm3,%ymm3 + vmovdqa 128-128(%rsi),%ymm0 + vmovdqa 160-128(%rsi),%ymm1 + vpor %ymm2,%ymm3,%ymm5 + vmovdqa 192-128(%rsi),%ymm2 + vmovdqa 224-128(%rsi),%ymm3 + vpand 128+128(%rax),%ymm0,%ymm0 + vpand 160+128(%rax),%ymm1,%ymm1 + vpand 192+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm4,%ymm4 + vpand 224+128(%rax),%ymm3,%ymm3 + vpand 256-128(%rsi),%ymm8,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 288-128(%rsi),%ymm9,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 320-128(%rsi),%ymm10,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 352-128(%rsi),%ymm11,%ymm3 + vpor %ymm0,%ymm4,%ymm4 + vpand 384-128(%rsi),%ymm12,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 416-128(%rsi),%ymm13,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 448-128(%rsi),%ymm14,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 480-128(%rsi),%ymm15,%ymm3 + leaq 512(%rsi),%rsi + vpor %ymm0,%ymm4,%ymm4 + vpor %ymm1,%ymm5,%ymm5 + vpor %ymm2,%ymm4,%ymm4 + vpor %ymm3,%ymm5,%ymm5 + + vpor %ymm5,%ymm4,%ymm4 + vextracti128 $1,%ymm4,%xmm5 + vpor %xmm4,%xmm5,%xmm5 + vpermd %ymm5,%ymm7,%ymm5 + vmovdqu %ymm5,(%rdi) + leaq 32(%rdi),%rdi + decl %edx + jnz .Loop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + leaq (%r11),%rsp .byte 0xf3,0xc3 -.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 + +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,@function +.align 32 +rsaz_avx2_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%eax + movl $524544,%ecx + movl $0,%edx + andl %eax,%ecx + cmpl $524544,%ecx + cmovel %edx,%eax + andl $32,%eax + shrl $5,%eax + .byte 0xf3,0xc3 +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.align 64 +.Land_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 +.Lscatter_permd: +.long 0,2,4,6,7,7,7,7 +.Lgather_permd: +.long 0,7,1,7,2,7,3,7 +.Linc: +.long 0,0,0,0, 1,1,1,1 +.long 2,2,2,2, 3,3,3,3 +.long 4,4,4,4, 4,4,4,4 +.align 64 Index: head/secure/lib/libcrypto/amd64/rsaz-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/rsaz-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/rsaz-x86_64.S (revision 299481) @@ -1,1220 +1,1875 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rsaz-x86_64.pl. .text .globl rsaz_512_sqr .type rsaz_512_sqr,@function .align 32 rsaz_512_sqr: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $128+24,%rsp .Lsqr_body: movq %rdx,%rbp movq (%rsi),%rdx movq 8(%rsi),%rax movq %rcx,128(%rsp) + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Loop_sqrx jmp .Loop_sqr .align 32 .Loop_sqr: movl %r8d,128+8(%rsp) movq %rdx,%rbx mulq %rdx movq %rax,%r8 movq 16(%rsi),%rax movq %rdx,%r9 mulq %rbx addq %rax,%r9 movq 24(%rsi),%rax movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r10 movq 32(%rsi),%rax movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r11 movq 40(%rsi),%rax movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r12 movq 48(%rsi),%rax movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r13 movq 56(%rsi),%rax movq %rdx,%r14 adcq $0,%r14 mulq %rbx addq %rax,%r14 movq %rbx,%rax movq %rdx,%r15 adcq $0,%r15 addq %r8,%r8 movq %r9,%rcx adcq %r9,%r9 mulq %rax movq %rax,(%rsp) addq %rdx,%r8 adcq $0,%r9 movq %r8,8(%rsp) shrq $63,%rcx movq 8(%rsi),%r8 movq 16(%rsi),%rax mulq %r8 addq %rax,%r10 movq 24(%rsi),%rax movq %rdx,%rbx adcq $0,%rbx mulq %r8 addq %rax,%r11 movq 32(%rsi),%rax adcq $0,%rdx addq %rbx,%r11 movq %rdx,%rbx adcq $0,%rbx mulq %r8 addq %rax,%r12 movq 40(%rsi),%rax adcq $0,%rdx addq %rbx,%r12 movq %rdx,%rbx adcq $0,%rbx mulq %r8 addq %rax,%r13 movq 48(%rsi),%rax adcq $0,%rdx addq %rbx,%r13 movq %rdx,%rbx adcq $0,%rbx mulq %r8 addq %rax,%r14 movq 56(%rsi),%rax adcq $0,%rdx addq %rbx,%r14 movq %rdx,%rbx adcq $0,%rbx mulq %r8 addq %rax,%r15 movq %r8,%rax adcq $0,%rdx addq %rbx,%r15 movq %rdx,%r8 movq %r10,%rdx adcq $0,%r8 addq %rdx,%rdx leaq (%rcx,%r10,2),%r10 movq %r11,%rbx adcq %r11,%r11 mulq %rax addq %rax,%r9 adcq %rdx,%r10 adcq $0,%r11 movq %r9,16(%rsp) movq %r10,24(%rsp) shrq $63,%rbx movq 16(%rsi),%r9 movq 24(%rsi),%rax mulq %r9 addq %rax,%r12 movq 32(%rsi),%rax movq %rdx,%rcx adcq $0,%rcx mulq %r9 addq %rax,%r13 movq 40(%rsi),%rax adcq $0,%rdx addq %rcx,%r13 movq %rdx,%rcx adcq $0,%rcx mulq %r9 addq %rax,%r14 movq 48(%rsi),%rax adcq $0,%rdx addq %rcx,%r14 movq %rdx,%rcx adcq $0,%rcx mulq %r9 movq %r12,%r10 leaq (%rbx,%r12,2),%r12 addq %rax,%r15 movq 56(%rsi),%rax adcq $0,%rdx addq %rcx,%r15 movq %rdx,%rcx adcq $0,%rcx mulq %r9 shrq $63,%r10 addq %rax,%r8 movq %r9,%rax adcq $0,%rdx addq %rcx,%r8 movq %rdx,%r9 adcq $0,%r9 movq %r13,%rcx leaq (%r10,%r13,2),%r13 mulq %rax addq %rax,%r11 adcq %rdx,%r12 adcq $0,%r13 movq %r11,32(%rsp) movq %r12,40(%rsp) shrq $63,%rcx movq 24(%rsi),%r10 movq 32(%rsi),%rax mulq %r10 addq %rax,%r14 movq 40(%rsi),%rax movq %rdx,%rbx adcq $0,%rbx mulq %r10 addq %rax,%r15 movq 48(%rsi),%rax adcq $0,%rdx addq %rbx,%r15 movq %rdx,%rbx adcq $0,%rbx mulq %r10 movq %r14,%r12 leaq (%rcx,%r14,2),%r14 addq %rax,%r8 movq 56(%rsi),%rax adcq $0,%rdx addq %rbx,%r8 movq %rdx,%rbx adcq $0,%rbx mulq %r10 shrq $63,%r12 addq %rax,%r9 movq %r10,%rax adcq $0,%rdx addq %rbx,%r9 movq %rdx,%r10 adcq $0,%r10 movq %r15,%rbx leaq (%r12,%r15,2),%r15 mulq %rax addq %rax,%r13 adcq %rdx,%r14 adcq $0,%r15 movq %r13,48(%rsp) movq %r14,56(%rsp) shrq $63,%rbx movq 32(%rsi),%r11 movq 40(%rsi),%rax mulq %r11 addq %rax,%r8 movq 48(%rsi),%rax movq %rdx,%rcx adcq $0,%rcx mulq %r11 addq %rax,%r9 movq 56(%rsi),%rax adcq $0,%rdx movq %r8,%r12 leaq (%rbx,%r8,2),%r8 addq %rcx,%r9 movq %rdx,%rcx adcq $0,%rcx mulq %r11 shrq $63,%r12 addq %rax,%r10 movq %r11,%rax adcq $0,%rdx addq %rcx,%r10 movq %rdx,%r11 adcq $0,%r11 movq %r9,%rcx leaq (%r12,%r9,2),%r9 mulq %rax addq %rax,%r15 adcq %rdx,%r8 adcq $0,%r9 movq %r15,64(%rsp) movq %r8,72(%rsp) shrq $63,%rcx movq 40(%rsi),%r12 movq 48(%rsi),%rax mulq %r12 addq %rax,%r10 movq 56(%rsi),%rax movq %rdx,%rbx adcq $0,%rbx mulq %r12 addq %rax,%r11 movq %r12,%rax movq %r10,%r15 leaq (%rcx,%r10,2),%r10 adcq $0,%rdx shrq $63,%r15 addq %rbx,%r11 movq %rdx,%r12 adcq $0,%r12 movq %r11,%rbx leaq (%r15,%r11,2),%r11 mulq %rax addq %rax,%r9 adcq %rdx,%r10 adcq $0,%r11 movq %r9,80(%rsp) movq %r10,88(%rsp) movq 48(%rsi),%r13 movq 56(%rsi),%rax mulq %r13 addq %rax,%r12 movq %r13,%rax movq %rdx,%r13 adcq $0,%r13 xorq %r14,%r14 shlq $1,%rbx adcq %r12,%r12 adcq %r13,%r13 adcq %r14,%r14 mulq %rax addq %rax,%r11 adcq %rdx,%r12 adcq $0,%r13 movq %r11,96(%rsp) movq %r12,104(%rsp) movq 56(%rsi),%rax mulq %rax addq %rax,%r13 adcq $0,%rdx addq %rdx,%r14 movq %r13,112(%rsp) movq %r14,120(%rsp) movq (%rsp),%r8 movq 8(%rsp),%r9 movq 16(%rsp),%r10 movq 24(%rsp),%r11 movq 32(%rsp),%r12 movq 40(%rsp),%r13 movq 48(%rsp),%r14 movq 56(%rsp),%r15 call __rsaz_512_reduce addq 64(%rsp),%r8 adcq 72(%rsp),%r9 adcq 80(%rsp),%r10 adcq 88(%rsp),%r11 adcq 96(%rsp),%r12 adcq 104(%rsp),%r13 adcq 112(%rsp),%r14 adcq 120(%rsp),%r15 sbbq %rcx,%rcx call __rsaz_512_subtract movq %r8,%rdx movq %r9,%rax movl 128+8(%rsp),%r8d movq %rdi,%rsi decl %r8d jnz .Loop_sqr + jmp .Lsqr_tail +.align 32 +.Loop_sqrx: + movl %r8d,128+8(%rsp) +.byte 102,72,15,110,199 +.byte 102,72,15,110,205 + + mulxq %rax,%r8,%r9 + + mulxq 16(%rsi),%rcx,%r10 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r11 + adcxq %rcx,%r9 + + mulxq 32(%rsi),%rcx,%r12 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rcx,%r11 + +.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r12 + adcxq %rcx,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adcxq %rbp,%r15 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shlq $1,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rdx,%r8 + movq 8(%rsi),%rdx + adcxq %rbp,%r9 + + movq %rax,(%rsp) + movq %r8,8(%rsp) + + + mulxq 16(%rsi),%rax,%rbx + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %r8,%r12 + + mulxq 32(%rsi),%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + + mulxq 40(%rsi),%rdi,%r8 + adoxq %rdi,%r13 + adcxq %r8,%r14 + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 + adoxq %rdi,%r15 + adcxq %rbp,%r8 + adoxq %rbp,%r8 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rcx + movq 16(%rsi),%rdx + adcxq %rax,%r9 + adcxq %rcx,%r10 + adcxq %rbp,%r11 + + movq %r9,16(%rsp) +.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 + + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 + adoxq %rdi,%r12 + adcxq %r9,%r13 + + mulxq 32(%rsi),%rax,%rcx + adoxq %rax,%r13 + adcxq %rcx,%r14 + + mulxq 40(%rsi),%rdi,%r9 + adoxq %rdi,%r14 + adcxq %r9,%r15 + +.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 + adoxq %rax,%r15 + adcxq %rcx,%r8 + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %rbp,%r9 + + movq %r13,%rcx + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 24(%rsi),%rdx + adcxq %rbp,%r13 + + movq %r11,32(%rsp) +.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + + mulxq 40(%rsi),%rdi,%r10 + adoxq %rdi,%r15 + adcxq %r10,%r8 + + mulxq 48(%rsi),%rax,%rbx + adoxq %rax,%r8 + adcxq %rbx,%r9 + + mulxq 56(%rsi),%rdi,%r10 + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %rbp,%r10 + +.byte 0x66 + movq %r15,%rbx + shldq $1,%r14,%r15 + shldq $1,%rcx,%r14 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r13 + adcxq %rdx,%r14 + movq 32(%rsi),%rdx + adcxq %rbp,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + + +.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %r11,%r9 + + mulxq 48(%rsi),%rax,%rcx + adoxq %rax,%r9 + adcxq %rcx,%r10 + + mulxq 56(%rsi),%rdi,%r11 + adoxq %rdi,%r10 + adcxq %rbp,%r11 + adoxq %rbp,%r11 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shldq $1,%rbx,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r15 + adcxq %rdx,%r8 + movq 40(%rsi),%rdx + adcxq %rbp,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %rbp,%r12 + adoxq %rbp,%r12 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r9 + adcxq %rdx,%r10 + movq 48(%rsi),%rdx + adcxq %rbp,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + +.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 + adoxq %rax,%r12 + adoxq %rbp,%r13 + + xorq %r14,%r14 + shldq $1,%r13,%r14 + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 56(%rsi),%rdx + adcxq %rbp,%r13 + +.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 +.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 + + + mulxq %rdx,%rax,%rdx + adoxq %rax,%r13 + adoxq %rbp,%rdx + +.byte 0x66 + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqrx + +.Lsqr_tail: + leaq 128+24+48(%rsp),%rax movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lsqr_epilogue: .byte 0xf3,0xc3 .size rsaz_512_sqr,.-rsaz_512_sqr .globl rsaz_512_mul .type rsaz_512_mul,@function .align 32 rsaz_512_mul: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $128+24,%rsp .Lmul_body: .byte 102,72,15,110,199 .byte 102,72,15,110,201 movq %r8,128(%rsp) + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx movq (%rdx),%rbx movq %rdx,%rbp call __rsaz_512_mul .byte 102,72,15,126,199 .byte 102,72,15,126,205 movq (%rsp),%r8 movq 8(%rsp),%r9 movq 16(%rsp),%r10 movq 24(%rsp),%r11 movq 32(%rsp),%r12 movq 40(%rsp),%r13 movq 48(%rsp),%r14 movq 56(%rsp),%r15 call __rsaz_512_reduce + jmp .Lmul_tail + +.align 32 +.Lmulx: + movq %rdx,%rbp + movq (%rdx),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex +.Lmul_tail: addq 64(%rsp),%r8 adcq 72(%rsp),%r9 adcq 80(%rsp),%r10 adcq 88(%rsp),%r11 adcq 96(%rsp),%r12 adcq 104(%rsp),%r13 adcq 112(%rsp),%r14 adcq 120(%rsp),%r15 sbbq %rcx,%rcx call __rsaz_512_subtract leaq 128+24+48(%rsp),%rax movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size rsaz_512_mul,.-rsaz_512_mul .globl rsaz_512_mul_gather4 .type rsaz_512_mul_gather4,@function .align 32 rsaz_512_mul_gather4: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $152,%rsp .Lmul_gather4_body: movd %r9d,%xmm8 movdqa .Linc+16(%rip),%xmm1 movdqa .Linc(%rip),%xmm0 pshufd $0,%xmm8,%xmm8 movdqa %xmm1,%xmm7 movdqa %xmm1,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm8,%xmm0 movdqa %xmm7,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm8,%xmm1 movdqa %xmm7,%xmm4 paddd %xmm2,%xmm3 pcmpeqd %xmm8,%xmm2 movdqa %xmm7,%xmm5 paddd %xmm3,%xmm4 pcmpeqd %xmm8,%xmm3 movdqa %xmm7,%xmm6 paddd %xmm4,%xmm5 pcmpeqd %xmm8,%xmm4 paddd %xmm5,%xmm6 pcmpeqd %xmm8,%xmm5 paddd %xmm6,%xmm7 pcmpeqd %xmm8,%xmm6 pcmpeqd %xmm8,%xmm7 movdqa 0(%rdx),%xmm8 movdqa 16(%rdx),%xmm9 movdqa 32(%rdx),%xmm10 movdqa 48(%rdx),%xmm11 pand %xmm0,%xmm8 movdqa 64(%rdx),%xmm12 pand %xmm1,%xmm9 movdqa 80(%rdx),%xmm13 pand %xmm2,%xmm10 movdqa 96(%rdx),%xmm14 pand %xmm3,%xmm11 movdqa 112(%rdx),%xmm15 leaq 128(%rdx),%rbp pand %xmm4,%xmm12 pand %xmm5,%xmm13 pand %xmm6,%xmm14 pand %xmm7,%xmm15 por %xmm10,%xmm8 por %xmm11,%xmm9 por %xmm12,%xmm8 por %xmm13,%xmm9 por %xmm14,%xmm8 por %xmm15,%xmm9 por %xmm9,%xmm8 pshufd $0x4e,%xmm8,%xmm9 por %xmm9,%xmm8 + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx_gather .byte 102,76,15,126,195 movq %r8,128(%rsp) movq %rdi,128+8(%rsp) movq %rcx,128+16(%rsp) movq (%rsi),%rax movq 8(%rsi),%rcx mulq %rbx movq %rax,(%rsp) movq %rcx,%rax movq %rdx,%r8 mulq %rbx addq %rax,%r8 movq 16(%rsi),%rax movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r9 movq 24(%rsi),%rax movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r10 movq 32(%rsi),%rax movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r11 movq 40(%rsi),%rax movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r12 movq 48(%rsi),%rax movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r13 movq 56(%rsi),%rax movq %rdx,%r14 adcq $0,%r14 mulq %rbx addq %rax,%r14 movq (%rsi),%rax movq %rdx,%r15 adcq $0,%r15 leaq 8(%rsp),%rdi movl $7,%ecx jmp .Loop_mul_gather .align 32 .Loop_mul_gather: movdqa 0(%rbp),%xmm8 movdqa 16(%rbp),%xmm9 movdqa 32(%rbp),%xmm10 movdqa 48(%rbp),%xmm11 pand %xmm0,%xmm8 movdqa 64(%rbp),%xmm12 pand %xmm1,%xmm9 movdqa 80(%rbp),%xmm13 pand %xmm2,%xmm10 movdqa 96(%rbp),%xmm14 pand %xmm3,%xmm11 movdqa 112(%rbp),%xmm15 leaq 128(%rbp),%rbp pand %xmm4,%xmm12 pand %xmm5,%xmm13 pand %xmm6,%xmm14 pand %xmm7,%xmm15 por %xmm10,%xmm8 por %xmm11,%xmm9 por %xmm12,%xmm8 por %xmm13,%xmm9 por %xmm14,%xmm8 por %xmm15,%xmm9 por %xmm9,%xmm8 pshufd $0x4e,%xmm8,%xmm9 por %xmm9,%xmm8 .byte 102,76,15,126,195 mulq %rbx addq %rax,%r8 movq 8(%rsi),%rax movq %r8,(%rdi) movq %rdx,%r8 adcq $0,%r8 mulq %rbx addq %rax,%r9 movq 16(%rsi),%rax adcq $0,%rdx addq %r9,%r8 movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r10 movq 24(%rsi),%rax adcq $0,%rdx addq %r10,%r9 movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r11 movq 32(%rsi),%rax adcq $0,%rdx addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r12 movq 40(%rsi),%rax adcq $0,%rdx addq %r12,%r11 movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r13 movq 48(%rsi),%rax adcq $0,%rdx addq %r13,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r14 movq 56(%rsi),%rax adcq $0,%rdx addq %r14,%r13 movq %rdx,%r14 adcq $0,%r14 mulq %rbx addq %rax,%r15 movq (%rsi),%rax adcq $0,%rdx addq %r15,%r14 movq %rdx,%r15 adcq $0,%r15 leaq 8(%rdi),%rdi decl %ecx jnz .Loop_mul_gather movq %r8,(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) movq 128+8(%rsp),%rdi movq 128+16(%rsp),%rbp movq (%rsp),%r8 movq 8(%rsp),%r9 movq 16(%rsp),%r10 movq 24(%rsp),%r11 movq 32(%rsp),%r12 movq 40(%rsp),%r13 movq 48(%rsp),%r14 movq 56(%rsp),%r15 call __rsaz_512_reduce + jmp .Lmul_gather_tail + +.align 32 +.Lmulx_gather: +.byte 102,76,15,126,194 + + movq %r8,128(%rsp) + movq %rdi,128+8(%rsp) + movq %rcx,128+16(%rsp) + + mulxq (%rsi),%rbx,%r8 + movq %rbx,(%rsp) + xorl %edi,%edi + + mulxq 8(%rsi),%rax,%r9 + + mulxq 16(%rsi),%rbx,%r10 + adcxq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcxq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + adcxq %rbx,%r13 + adcxq %rax,%r14 +.byte 0x67 + movq %r8,%rbx + adcxq %rdi,%r15 + + movq $-7,%rcx + jmp .Loop_mulx_gather + +.align 32 +.Loop_mulx_gather: + movdqa 0(%rbp),%xmm8 + movdqa 16(%rbp),%xmm9 + movdqa 32(%rbp),%xmm10 + movdqa 48(%rbp),%xmm11 + pand %xmm0,%xmm8 + movdqa 64(%rbp),%xmm12 + pand %xmm1,%xmm9 + movdqa 80(%rbp),%xmm13 + pand %xmm2,%xmm10 + movdqa 96(%rbp),%xmm14 + pand %xmm3,%xmm11 + movdqa 112(%rbp),%xmm15 + leaq 128(%rbp),%rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd $0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 +.byte 102,76,15,126,194 + +.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + +.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 +.byte 0x67 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq %rbx,64(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + movq %r8,%rbx + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx_gather + + movq %r8,64(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + + movq 128(%rsp),%rdx + movq 128+8(%rsp),%rdi + movq 128+16(%rsp),%rbp + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_gather_tail: addq 64(%rsp),%r8 adcq 72(%rsp),%r9 adcq 80(%rsp),%r10 adcq 88(%rsp),%r11 adcq 96(%rsp),%r12 adcq 104(%rsp),%r13 adcq 112(%rsp),%r14 adcq 120(%rsp),%r15 sbbq %rcx,%rcx call __rsaz_512_subtract leaq 128+24+48(%rsp),%rax movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lmul_gather4_epilogue: .byte 0xf3,0xc3 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 .globl rsaz_512_mul_scatter4 .type rsaz_512_mul_scatter4,@function .align 32 rsaz_512_mul_scatter4: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movl %r9d,%r9d subq $128+24,%rsp .Lmul_scatter4_body: leaq (%r8,%r9,8),%r8 .byte 102,72,15,110,199 .byte 102,72,15,110,202 .byte 102,73,15,110,208 movq %rcx,128(%rsp) movq %rdi,%rbp + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx_scatter movq (%rdi),%rbx call __rsaz_512_mul .byte 102,72,15,126,199 .byte 102,72,15,126,205 movq (%rsp),%r8 movq 8(%rsp),%r9 movq 16(%rsp),%r10 movq 24(%rsp),%r11 movq 32(%rsp),%r12 movq 40(%rsp),%r13 movq 48(%rsp),%r14 movq 56(%rsp),%r15 call __rsaz_512_reduce + jmp .Lmul_scatter_tail + +.align 32 +.Lmulx_scatter: + movq (%rdi),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_scatter_tail: addq 64(%rsp),%r8 adcq 72(%rsp),%r9 adcq 80(%rsp),%r10 adcq 88(%rsp),%r11 adcq 96(%rsp),%r12 adcq 104(%rsp),%r13 adcq 112(%rsp),%r14 adcq 120(%rsp),%r15 .byte 102,72,15,126,214 sbbq %rcx,%rcx call __rsaz_512_subtract movq %r8,0(%rsi) movq %r9,128(%rsi) movq %r10,256(%rsi) movq %r11,384(%rsi) movq %r12,512(%rsi) movq %r13,640(%rsi) movq %r14,768(%rsi) movq %r15,896(%rsi) leaq 128+24+48(%rsp),%rax movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lmul_scatter4_epilogue: .byte 0xf3,0xc3 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 .globl rsaz_512_mul_by_one .type rsaz_512_mul_by_one,@function .align 32 rsaz_512_mul_by_one: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $128+24,%rsp .Lmul_by_one_body: + movl OPENSSL_ia32cap_P+8(%rip),%eax movq %rdx,%rbp movq %rcx,128(%rsp) movq (%rsi),%r8 pxor %xmm0,%xmm0 movq 8(%rsi),%r9 movq 16(%rsi),%r10 movq 24(%rsi),%r11 movq 32(%rsi),%r12 movq 40(%rsi),%r13 movq 48(%rsi),%r14 movq 56(%rsi),%r15 movdqa %xmm0,(%rsp) movdqa %xmm0,16(%rsp) movdqa %xmm0,32(%rsp) movdqa %xmm0,48(%rsp) movdqa %xmm0,64(%rsp) movdqa %xmm0,80(%rsp) movdqa %xmm0,96(%rsp) + andl $0x80100,%eax + cmpl $0x80100,%eax + je .Lby_one_callx call __rsaz_512_reduce + jmp .Lby_one_tail +.align 32 +.Lby_one_callx: + movq 128(%rsp),%rdx + call __rsaz_512_reducex +.Lby_one_tail: movq %r8,(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) leaq 128+24+48(%rsp),%rax movq -48(%rax),%r15 movq -40(%rax),%r14 movq -32(%rax),%r13 movq -24(%rax),%r12 movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lmul_by_one_epilogue: .byte 0xf3,0xc3 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one .type __rsaz_512_reduce,@function .align 32 __rsaz_512_reduce: movq %r8,%rbx imulq 128+8(%rsp),%rbx movq 0(%rbp),%rax movl $8,%ecx jmp .Lreduction_loop .align 32 .Lreduction_loop: mulq %rbx movq 8(%rbp),%rax negq %r8 movq %rdx,%r8 adcq $0,%r8 mulq %rbx addq %rax,%r9 movq 16(%rbp),%rax adcq $0,%rdx addq %r9,%r8 movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r10 movq 24(%rbp),%rax adcq $0,%rdx addq %r10,%r9 movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r11 movq 32(%rbp),%rax adcq $0,%rdx addq %r11,%r10 movq 128+8(%rsp),%rsi adcq $0,%rdx movq %rdx,%r11 mulq %rbx addq %rax,%r12 movq 40(%rbp),%rax adcq $0,%rdx imulq %r8,%rsi addq %r12,%r11 movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r13 movq 48(%rbp),%rax adcq $0,%rdx addq %r13,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r14 movq 56(%rbp),%rax adcq $0,%rdx addq %r14,%r13 movq %rdx,%r14 adcq $0,%r14 mulq %rbx movq %rsi,%rbx addq %rax,%r15 movq 0(%rbp),%rax adcq $0,%rdx addq %r15,%r14 movq %rdx,%r15 adcq $0,%r15 decl %ecx jne .Lreduction_loop .byte 0xf3,0xc3 .size __rsaz_512_reduce,.-__rsaz_512_reduce +.type __rsaz_512_reducex,@function +.align 32 +__rsaz_512_reducex: + + imulq %r8,%rdx + xorq %rsi,%rsi + movl $8,%ecx + jmp .Lreduction_loopx + +.align 32 +.Lreduction_loopx: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 128+8(%rsp),%rbx,%rdx + movq %rax,%rdx + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + + decl %ecx + jne .Lreduction_loopx + + .byte 0xf3,0xc3 +.size __rsaz_512_reducex,.-__rsaz_512_reducex .type __rsaz_512_subtract,@function .align 32 __rsaz_512_subtract: movq %r8,(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) movq 0(%rbp),%r8 movq 8(%rbp),%r9 negq %r8 notq %r9 andq %rcx,%r8 movq 16(%rbp),%r10 andq %rcx,%r9 notq %r10 movq 24(%rbp),%r11 andq %rcx,%r10 notq %r11 movq 32(%rbp),%r12 andq %rcx,%r11 notq %r12 movq 40(%rbp),%r13 andq %rcx,%r12 notq %r13 movq 48(%rbp),%r14 andq %rcx,%r13 notq %r14 movq 56(%rbp),%r15 andq %rcx,%r14 notq %r15 andq %rcx,%r15 addq (%rdi),%r8 adcq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 movq %r8,(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) .byte 0xf3,0xc3 .size __rsaz_512_subtract,.-__rsaz_512_subtract .type __rsaz_512_mul,@function .align 32 __rsaz_512_mul: leaq 8(%rsp),%rdi movq (%rsi),%rax mulq %rbx movq %rax,(%rdi) movq 8(%rsi),%rax movq %rdx,%r8 mulq %rbx addq %rax,%r8 movq 16(%rsi),%rax movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r9 movq 24(%rsi),%rax movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r10 movq 32(%rsi),%rax movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r11 movq 40(%rsi),%rax movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r12 movq 48(%rsi),%rax movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r13 movq 56(%rsi),%rax movq %rdx,%r14 adcq $0,%r14 mulq %rbx addq %rax,%r14 movq (%rsi),%rax movq %rdx,%r15 adcq $0,%r15 leaq 8(%rbp),%rbp leaq 8(%rdi),%rdi movl $7,%ecx jmp .Loop_mul .align 32 .Loop_mul: movq (%rbp),%rbx mulq %rbx addq %rax,%r8 movq 8(%rsi),%rax movq %r8,(%rdi) movq %rdx,%r8 adcq $0,%r8 mulq %rbx addq %rax,%r9 movq 16(%rsi),%rax adcq $0,%rdx addq %r9,%r8 movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r10 movq 24(%rsi),%rax adcq $0,%rdx addq %r10,%r9 movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r11 movq 32(%rsi),%rax adcq $0,%rdx addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r12 movq 40(%rsi),%rax adcq $0,%rdx addq %r12,%r11 movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r13 movq 48(%rsi),%rax adcq $0,%rdx addq %r13,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r14 movq 56(%rsi),%rax adcq $0,%rdx addq %r14,%r13 movq %rdx,%r14 leaq 8(%rbp),%rbp adcq $0,%r14 mulq %rbx addq %rax,%r15 movq (%rsi),%rax adcq $0,%rdx addq %r15,%r14 movq %rdx,%r15 adcq $0,%r15 leaq 8(%rdi),%rdi decl %ecx jnz .Loop_mul movq %r8,(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) .byte 0xf3,0xc3 .size __rsaz_512_mul,.-__rsaz_512_mul +.type __rsaz_512_mulx,@function +.align 32 +__rsaz_512_mulx: + mulxq (%rsi),%rbx,%r8 + movq $-6,%rcx + + mulxq 8(%rsi),%rax,%r9 + movq %rbx,8(%rsp) + + mulxq 16(%rsi),%rbx,%r10 + adcq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + movq 8(%rbp),%rdx + adcq %rbx,%r13 + adcq %rax,%r14 + adcq $0,%r15 + + xorq %rdi,%rdi + jmp .Loop_mulx + +.align 32 +.Loop_mulx: + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rsi),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq 64(%rbp,%rcx,8),%rdx + movq %rbx,8+64-8(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx + + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + +.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + +.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + movq %rbx,8+64-8(%rsp) + movq %r8,8+64(%rsp) + movq %r9,8+64+8(%rsp) + movq %r10,8+64+16(%rsp) + movq %r11,8+64+24(%rsp) + movq %r12,8+64+32(%rsp) + movq %r13,8+64+40(%rsp) + movq %r14,8+64+48(%rsp) + movq %r15,8+64+56(%rsp) + + .byte 0xf3,0xc3 +.size __rsaz_512_mulx,.-__rsaz_512_mulx .globl rsaz_512_scatter4 .type rsaz_512_scatter4,@function .align 16 rsaz_512_scatter4: leaq (%rdi,%rdx,8),%rdi movl $8,%r9d jmp .Loop_scatter .align 16 .Loop_scatter: movq (%rsi),%rax leaq 8(%rsi),%rsi movq %rax,(%rdi) leaq 128(%rdi),%rdi decl %r9d jnz .Loop_scatter .byte 0xf3,0xc3 .size rsaz_512_scatter4,.-rsaz_512_scatter4 .globl rsaz_512_gather4 .type rsaz_512_gather4,@function .align 16 rsaz_512_gather4: movd %edx,%xmm8 movdqa .Linc+16(%rip),%xmm1 movdqa .Linc(%rip),%xmm0 pshufd $0,%xmm8,%xmm8 movdqa %xmm1,%xmm7 movdqa %xmm1,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm8,%xmm0 movdqa %xmm7,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm8,%xmm1 movdqa %xmm7,%xmm4 paddd %xmm2,%xmm3 pcmpeqd %xmm8,%xmm2 movdqa %xmm7,%xmm5 paddd %xmm3,%xmm4 pcmpeqd %xmm8,%xmm3 movdqa %xmm7,%xmm6 paddd %xmm4,%xmm5 pcmpeqd %xmm8,%xmm4 paddd %xmm5,%xmm6 pcmpeqd %xmm8,%xmm5 paddd %xmm6,%xmm7 pcmpeqd %xmm8,%xmm6 pcmpeqd %xmm8,%xmm7 movl $8,%r9d jmp .Loop_gather .align 16 .Loop_gather: movdqa 0(%rsi),%xmm8 movdqa 16(%rsi),%xmm9 movdqa 32(%rsi),%xmm10 movdqa 48(%rsi),%xmm11 pand %xmm0,%xmm8 movdqa 64(%rsi),%xmm12 pand %xmm1,%xmm9 movdqa 80(%rsi),%xmm13 pand %xmm2,%xmm10 movdqa 96(%rsi),%xmm14 pand %xmm3,%xmm11 movdqa 112(%rsi),%xmm15 leaq 128(%rsi),%rsi pand %xmm4,%xmm12 pand %xmm5,%xmm13 pand %xmm6,%xmm14 pand %xmm7,%xmm15 por %xmm10,%xmm8 por %xmm11,%xmm9 por %xmm12,%xmm8 por %xmm13,%xmm9 por %xmm14,%xmm8 por %xmm15,%xmm9 por %xmm9,%xmm8 pshufd $0x4e,%xmm8,%xmm9 por %xmm9,%xmm8 movq %xmm8,(%rdi) leaq 8(%rdi),%rdi decl %r9d jnz .Loop_gather .byte 0xf3,0xc3 .LSEH_end_rsaz_512_gather4: .size rsaz_512_gather4,.-rsaz_512_gather4 .align 64 .Linc: .long 0,0, 1,1 .long 2,2, 2,2 Index: head/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S (revision 299481) @@ -1,2935 +1,7223 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha1-mb-x86_64.pl. .text .globl sha1_multi_block .type sha1_multi_block,@function .align 32 sha1_multi_block: movq OPENSSL_ia32cap_P+4(%rip),%rcx btq $61,%rcx jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut movq %rsp,%rax pushq %rbx pushq %rbp subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) .Lbody: leaq K_XX_XX(%rip),%rbp leaq 256(%rsp),%rbx .Loop_grande: movl %edx,280(%rsp) xorl %edx,%edx movq 0(%rsi),%r8 movl 8(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,0(%rbx) cmovleq %rbp,%r8 movq 16(%rsi),%r9 movl 24(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,4(%rbx) cmovleq %rbp,%r9 movq 32(%rsi),%r10 movl 40(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,8(%rbx) cmovleq %rbp,%r10 movq 48(%rsi),%r11 movl 56(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,12(%rbx) cmovleq %rbp,%r11 testl %edx,%edx jz .Ldone movdqu 0(%rdi),%xmm10 leaq 128(%rsp),%rax movdqu 32(%rdi),%xmm11 movdqu 64(%rdi),%xmm12 movdqu 96(%rdi),%xmm13 movdqu 128(%rdi),%xmm14 movdqa 96(%rbp),%xmm5 movdqa -32(%rbp),%xmm15 jmp .Loop .align 32 .Loop: movd (%r8),%xmm0 leaq 64(%r8),%r8 movd (%r9),%xmm2 leaq 64(%r9),%r9 movd (%r10),%xmm3 leaq 64(%r10),%r10 movd (%r11),%xmm4 leaq 64(%r11),%r11 punpckldq %xmm3,%xmm0 movd -60(%r8),%xmm1 punpckldq %xmm4,%xmm2 movd -60(%r9),%xmm9 punpckldq %xmm2,%xmm0 movd -60(%r10),%xmm8 .byte 102,15,56,0,197 movd -60(%r11),%xmm7 punpckldq %xmm8,%xmm1 movdqa %xmm10,%xmm8 paddd %xmm15,%xmm14 punpckldq %xmm7,%xmm9 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm6 pslld $5,%xmm8 pandn %xmm13,%xmm7 pand %xmm12,%xmm6 punpckldq %xmm9,%xmm1 movdqa %xmm10,%xmm9 movdqa %xmm0,0-128(%rax) paddd %xmm0,%xmm14 movd -56(%r8),%xmm2 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm11,%xmm7 por %xmm9,%xmm8 movd -56(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 .byte 102,15,56,0,205 movd -56(%r10),%xmm8 por %xmm7,%xmm11 movd -56(%r11),%xmm7 punpckldq %xmm8,%xmm2 movdqa %xmm14,%xmm8 paddd %xmm15,%xmm13 punpckldq %xmm7,%xmm9 movdqa %xmm10,%xmm7 movdqa %xmm10,%xmm6 pslld $5,%xmm8 pandn %xmm12,%xmm7 pand %xmm11,%xmm6 punpckldq %xmm9,%xmm2 movdqa %xmm14,%xmm9 movdqa %xmm1,16-128(%rax) paddd %xmm1,%xmm13 movd -52(%r8),%xmm3 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm10,%xmm7 por %xmm9,%xmm8 movd -52(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 .byte 102,15,56,0,213 movd -52(%r10),%xmm8 por %xmm7,%xmm10 movd -52(%r11),%xmm7 punpckldq %xmm8,%xmm3 movdqa %xmm13,%xmm8 paddd %xmm15,%xmm12 punpckldq %xmm7,%xmm9 movdqa %xmm14,%xmm7 movdqa %xmm14,%xmm6 pslld $5,%xmm8 pandn %xmm11,%xmm7 pand %xmm10,%xmm6 punpckldq %xmm9,%xmm3 movdqa %xmm13,%xmm9 movdqa %xmm2,32-128(%rax) paddd %xmm2,%xmm12 movd -48(%r8),%xmm4 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm14,%xmm7 por %xmm9,%xmm8 movd -48(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 .byte 102,15,56,0,221 movd -48(%r10),%xmm8 por %xmm7,%xmm14 movd -48(%r11),%xmm7 punpckldq %xmm8,%xmm4 movdqa %xmm12,%xmm8 paddd %xmm15,%xmm11 punpckldq %xmm7,%xmm9 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm6 pslld $5,%xmm8 pandn %xmm10,%xmm7 pand %xmm14,%xmm6 punpckldq %xmm9,%xmm4 movdqa %xmm12,%xmm9 movdqa %xmm3,48-128(%rax) paddd %xmm3,%xmm11 movd -44(%r8),%xmm0 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm13,%xmm7 por %xmm9,%xmm8 movd -44(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 .byte 102,15,56,0,229 movd -44(%r10),%xmm8 por %xmm7,%xmm13 movd -44(%r11),%xmm7 punpckldq %xmm8,%xmm0 movdqa %xmm11,%xmm8 paddd %xmm15,%xmm10 punpckldq %xmm7,%xmm9 movdqa %xmm12,%xmm7 movdqa %xmm12,%xmm6 pslld $5,%xmm8 pandn %xmm14,%xmm7 pand %xmm13,%xmm6 punpckldq %xmm9,%xmm0 movdqa %xmm11,%xmm9 movdqa %xmm4,64-128(%rax) paddd %xmm4,%xmm10 movd -40(%r8),%xmm1 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm12,%xmm7 por %xmm9,%xmm8 movd -40(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 .byte 102,15,56,0,197 movd -40(%r10),%xmm8 por %xmm7,%xmm12 movd -40(%r11),%xmm7 punpckldq %xmm8,%xmm1 movdqa %xmm10,%xmm8 paddd %xmm15,%xmm14 punpckldq %xmm7,%xmm9 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm6 pslld $5,%xmm8 pandn %xmm13,%xmm7 pand %xmm12,%xmm6 punpckldq %xmm9,%xmm1 movdqa %xmm10,%xmm9 movdqa %xmm0,80-128(%rax) paddd %xmm0,%xmm14 movd -36(%r8),%xmm2 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm11,%xmm7 por %xmm9,%xmm8 movd -36(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 .byte 102,15,56,0,205 movd -36(%r10),%xmm8 por %xmm7,%xmm11 movd -36(%r11),%xmm7 punpckldq %xmm8,%xmm2 movdqa %xmm14,%xmm8 paddd %xmm15,%xmm13 punpckldq %xmm7,%xmm9 movdqa %xmm10,%xmm7 movdqa %xmm10,%xmm6 pslld $5,%xmm8 pandn %xmm12,%xmm7 pand %xmm11,%xmm6 punpckldq %xmm9,%xmm2 movdqa %xmm14,%xmm9 movdqa %xmm1,96-128(%rax) paddd %xmm1,%xmm13 movd -32(%r8),%xmm3 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm10,%xmm7 por %xmm9,%xmm8 movd -32(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 .byte 102,15,56,0,213 movd -32(%r10),%xmm8 por %xmm7,%xmm10 movd -32(%r11),%xmm7 punpckldq %xmm8,%xmm3 movdqa %xmm13,%xmm8 paddd %xmm15,%xmm12 punpckldq %xmm7,%xmm9 movdqa %xmm14,%xmm7 movdqa %xmm14,%xmm6 pslld $5,%xmm8 pandn %xmm11,%xmm7 pand %xmm10,%xmm6 punpckldq %xmm9,%xmm3 movdqa %xmm13,%xmm9 movdqa %xmm2,112-128(%rax) paddd %xmm2,%xmm12 movd -28(%r8),%xmm4 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm14,%xmm7 por %xmm9,%xmm8 movd -28(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 .byte 102,15,56,0,221 movd -28(%r10),%xmm8 por %xmm7,%xmm14 movd -28(%r11),%xmm7 punpckldq %xmm8,%xmm4 movdqa %xmm12,%xmm8 paddd %xmm15,%xmm11 punpckldq %xmm7,%xmm9 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm6 pslld $5,%xmm8 pandn %xmm10,%xmm7 pand %xmm14,%xmm6 punpckldq %xmm9,%xmm4 movdqa %xmm12,%xmm9 movdqa %xmm3,128-128(%rax) paddd %xmm3,%xmm11 movd -24(%r8),%xmm0 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm13,%xmm7 por %xmm9,%xmm8 movd -24(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 .byte 102,15,56,0,229 movd -24(%r10),%xmm8 por %xmm7,%xmm13 movd -24(%r11),%xmm7 punpckldq %xmm8,%xmm0 movdqa %xmm11,%xmm8 paddd %xmm15,%xmm10 punpckldq %xmm7,%xmm9 movdqa %xmm12,%xmm7 movdqa %xmm12,%xmm6 pslld $5,%xmm8 pandn %xmm14,%xmm7 pand %xmm13,%xmm6 punpckldq %xmm9,%xmm0 movdqa %xmm11,%xmm9 movdqa %xmm4,144-128(%rax) paddd %xmm4,%xmm10 movd -20(%r8),%xmm1 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm12,%xmm7 por %xmm9,%xmm8 movd -20(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 .byte 102,15,56,0,197 movd -20(%r10),%xmm8 por %xmm7,%xmm12 movd -20(%r11),%xmm7 punpckldq %xmm8,%xmm1 movdqa %xmm10,%xmm8 paddd %xmm15,%xmm14 punpckldq %xmm7,%xmm9 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm6 pslld $5,%xmm8 pandn %xmm13,%xmm7 pand %xmm12,%xmm6 punpckldq %xmm9,%xmm1 movdqa %xmm10,%xmm9 movdqa %xmm0,160-128(%rax) paddd %xmm0,%xmm14 movd -16(%r8),%xmm2 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm11,%xmm7 por %xmm9,%xmm8 movd -16(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 .byte 102,15,56,0,205 movd -16(%r10),%xmm8 por %xmm7,%xmm11 movd -16(%r11),%xmm7 punpckldq %xmm8,%xmm2 movdqa %xmm14,%xmm8 paddd %xmm15,%xmm13 punpckldq %xmm7,%xmm9 movdqa %xmm10,%xmm7 movdqa %xmm10,%xmm6 pslld $5,%xmm8 pandn %xmm12,%xmm7 pand %xmm11,%xmm6 punpckldq %xmm9,%xmm2 movdqa %xmm14,%xmm9 movdqa %xmm1,176-128(%rax) paddd %xmm1,%xmm13 movd -12(%r8),%xmm3 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm10,%xmm7 por %xmm9,%xmm8 movd -12(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 .byte 102,15,56,0,213 movd -12(%r10),%xmm8 por %xmm7,%xmm10 movd -12(%r11),%xmm7 punpckldq %xmm8,%xmm3 movdqa %xmm13,%xmm8 paddd %xmm15,%xmm12 punpckldq %xmm7,%xmm9 movdqa %xmm14,%xmm7 movdqa %xmm14,%xmm6 pslld $5,%xmm8 pandn %xmm11,%xmm7 pand %xmm10,%xmm6 punpckldq %xmm9,%xmm3 movdqa %xmm13,%xmm9 movdqa %xmm2,192-128(%rax) paddd %xmm2,%xmm12 movd -8(%r8),%xmm4 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm14,%xmm7 por %xmm9,%xmm8 movd -8(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 .byte 102,15,56,0,221 movd -8(%r10),%xmm8 por %xmm7,%xmm14 movd -8(%r11),%xmm7 punpckldq %xmm8,%xmm4 movdqa %xmm12,%xmm8 paddd %xmm15,%xmm11 punpckldq %xmm7,%xmm9 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm6 pslld $5,%xmm8 pandn %xmm10,%xmm7 pand %xmm14,%xmm6 punpckldq %xmm9,%xmm4 movdqa %xmm12,%xmm9 movdqa %xmm3,208-128(%rax) paddd %xmm3,%xmm11 movd -4(%r8),%xmm0 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm13,%xmm7 por %xmm9,%xmm8 movd -4(%r9),%xmm9 pslld $30,%xmm7 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 .byte 102,15,56,0,229 movd -4(%r10),%xmm8 por %xmm7,%xmm13 movdqa 0-128(%rax),%xmm1 movd -4(%r11),%xmm7 punpckldq %xmm8,%xmm0 movdqa %xmm11,%xmm8 paddd %xmm15,%xmm10 punpckldq %xmm7,%xmm9 movdqa %xmm12,%xmm7 movdqa %xmm12,%xmm6 pslld $5,%xmm8 prefetcht0 63(%r8) pandn %xmm14,%xmm7 pand %xmm13,%xmm6 punpckldq %xmm9,%xmm0 movdqa %xmm11,%xmm9 movdqa %xmm4,224-128(%rax) paddd %xmm4,%xmm10 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm12,%xmm7 prefetcht0 63(%r9) por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm10 prefetcht0 63(%r10) psrld $2,%xmm12 paddd %xmm8,%xmm10 .byte 102,15,56,0,197 prefetcht0 63(%r11) por %xmm7,%xmm12 movdqa 16-128(%rax),%xmm2 pxor %xmm3,%xmm1 movdqa 32-128(%rax),%xmm3 movdqa %xmm10,%xmm8 pxor 128-128(%rax),%xmm1 paddd %xmm15,%xmm14 movdqa %xmm11,%xmm7 pslld $5,%xmm8 pxor %xmm3,%xmm1 movdqa %xmm11,%xmm6 pandn %xmm13,%xmm7 movdqa %xmm1,%xmm5 pand %xmm12,%xmm6 movdqa %xmm10,%xmm9 psrld $31,%xmm5 paddd %xmm1,%xmm1 movdqa %xmm0,240-128(%rax) paddd %xmm0,%xmm14 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm11,%xmm7 por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 48-128(%rax),%xmm4 movdqa %xmm14,%xmm8 pxor 144-128(%rax),%xmm2 paddd %xmm15,%xmm13 movdqa %xmm10,%xmm7 pslld $5,%xmm8 pxor %xmm4,%xmm2 movdqa %xmm10,%xmm6 pandn %xmm12,%xmm7 movdqa %xmm2,%xmm5 pand %xmm11,%xmm6 movdqa %xmm14,%xmm9 psrld $31,%xmm5 paddd %xmm2,%xmm2 movdqa %xmm1,0-128(%rax) paddd %xmm1,%xmm13 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm10,%xmm7 por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 64-128(%rax),%xmm0 movdqa %xmm13,%xmm8 pxor 160-128(%rax),%xmm3 paddd %xmm15,%xmm12 movdqa %xmm14,%xmm7 pslld $5,%xmm8 pxor %xmm0,%xmm3 movdqa %xmm14,%xmm6 pandn %xmm11,%xmm7 movdqa %xmm3,%xmm5 pand %xmm10,%xmm6 movdqa %xmm13,%xmm9 psrld $31,%xmm5 paddd %xmm3,%xmm3 movdqa %xmm2,16-128(%rax) paddd %xmm2,%xmm12 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm14,%xmm7 por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 80-128(%rax),%xmm1 movdqa %xmm12,%xmm8 pxor 176-128(%rax),%xmm4 paddd %xmm15,%xmm11 movdqa %xmm13,%xmm7 pslld $5,%xmm8 pxor %xmm1,%xmm4 movdqa %xmm13,%xmm6 pandn %xmm10,%xmm7 movdqa %xmm4,%xmm5 pand %xmm14,%xmm6 movdqa %xmm12,%xmm9 psrld $31,%xmm5 paddd %xmm4,%xmm4 movdqa %xmm3,32-128(%rax) paddd %xmm3,%xmm11 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm13,%xmm7 por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 96-128(%rax),%xmm2 movdqa %xmm11,%xmm8 pxor 192-128(%rax),%xmm0 paddd %xmm15,%xmm10 movdqa %xmm12,%xmm7 pslld $5,%xmm8 pxor %xmm2,%xmm0 movdqa %xmm12,%xmm6 pandn %xmm14,%xmm7 movdqa %xmm0,%xmm5 pand %xmm13,%xmm6 movdqa %xmm11,%xmm9 psrld $31,%xmm5 paddd %xmm0,%xmm0 movdqa %xmm4,48-128(%rax) paddd %xmm4,%xmm10 psrld $27,%xmm9 pxor %xmm7,%xmm6 movdqa %xmm12,%xmm7 por %xmm9,%xmm8 pslld $30,%xmm7 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 movdqa 0(%rbp),%xmm15 pxor %xmm3,%xmm1 movdqa 112-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 208-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,64-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 128-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 224-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,80-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 144-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 240-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,96-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 160-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 0-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,112-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 176-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 16-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,128-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 192-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 32-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,144-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 208-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 48-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,160-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 224-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 64-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,176-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 240-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 80-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,192-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 0-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 96-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,208-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 16-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 112-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,224-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 32-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 128-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,240-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 48-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 144-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,0-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 64-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 160-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,16-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 80-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 176-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,32-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 96-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 192-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,48-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 112-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 208-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,64-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 128-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 224-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,80-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 144-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 240-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,96-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 160-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 0-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,112-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 movdqa 32(%rbp),%xmm15 pxor %xmm3,%xmm1 movdqa 176-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm7 pxor 16-128(%rax),%xmm1 pxor %xmm3,%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 movdqa %xmm10,%xmm9 pand %xmm12,%xmm7 movdqa %xmm13,%xmm6 movdqa %xmm1,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm14 pxor %xmm12,%xmm6 movdqa %xmm0,128-128(%rax) paddd %xmm0,%xmm14 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm11,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 paddd %xmm1,%xmm1 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 192-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm7 pxor 32-128(%rax),%xmm2 pxor %xmm4,%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 movdqa %xmm14,%xmm9 pand %xmm11,%xmm7 movdqa %xmm12,%xmm6 movdqa %xmm2,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm13 pxor %xmm11,%xmm6 movdqa %xmm1,144-128(%rax) paddd %xmm1,%xmm13 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm10,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 paddd %xmm2,%xmm2 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 208-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm7 pxor 48-128(%rax),%xmm3 pxor %xmm0,%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 movdqa %xmm13,%xmm9 pand %xmm10,%xmm7 movdqa %xmm11,%xmm6 movdqa %xmm3,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm12 pxor %xmm10,%xmm6 movdqa %xmm2,160-128(%rax) paddd %xmm2,%xmm12 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm14,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 paddd %xmm3,%xmm3 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 224-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm7 pxor 64-128(%rax),%xmm4 pxor %xmm1,%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 movdqa %xmm12,%xmm9 pand %xmm14,%xmm7 movdqa %xmm10,%xmm6 movdqa %xmm4,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm11 pxor %xmm14,%xmm6 movdqa %xmm3,176-128(%rax) paddd %xmm3,%xmm11 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm13,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 paddd %xmm4,%xmm4 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 240-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm7 pxor 80-128(%rax),%xmm0 pxor %xmm2,%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 movdqa %xmm11,%xmm9 pand %xmm13,%xmm7 movdqa %xmm14,%xmm6 movdqa %xmm0,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm10 pxor %xmm13,%xmm6 movdqa %xmm4,192-128(%rax) paddd %xmm4,%xmm10 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm12,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 paddd %xmm0,%xmm0 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 0-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm7 pxor 96-128(%rax),%xmm1 pxor %xmm3,%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 movdqa %xmm10,%xmm9 pand %xmm12,%xmm7 movdqa %xmm13,%xmm6 movdqa %xmm1,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm14 pxor %xmm12,%xmm6 movdqa %xmm0,208-128(%rax) paddd %xmm0,%xmm14 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm11,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 paddd %xmm1,%xmm1 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 16-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm7 pxor 112-128(%rax),%xmm2 pxor %xmm4,%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 movdqa %xmm14,%xmm9 pand %xmm11,%xmm7 movdqa %xmm12,%xmm6 movdqa %xmm2,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm13 pxor %xmm11,%xmm6 movdqa %xmm1,224-128(%rax) paddd %xmm1,%xmm13 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm10,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 paddd %xmm2,%xmm2 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 32-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm7 pxor 128-128(%rax),%xmm3 pxor %xmm0,%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 movdqa %xmm13,%xmm9 pand %xmm10,%xmm7 movdqa %xmm11,%xmm6 movdqa %xmm3,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm12 pxor %xmm10,%xmm6 movdqa %xmm2,240-128(%rax) paddd %xmm2,%xmm12 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm14,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 paddd %xmm3,%xmm3 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 48-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm7 pxor 144-128(%rax),%xmm4 pxor %xmm1,%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 movdqa %xmm12,%xmm9 pand %xmm14,%xmm7 movdqa %xmm10,%xmm6 movdqa %xmm4,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm11 pxor %xmm14,%xmm6 movdqa %xmm3,0-128(%rax) paddd %xmm3,%xmm11 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm13,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 paddd %xmm4,%xmm4 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 64-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm7 pxor 160-128(%rax),%xmm0 pxor %xmm2,%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 movdqa %xmm11,%xmm9 pand %xmm13,%xmm7 movdqa %xmm14,%xmm6 movdqa %xmm0,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm10 pxor %xmm13,%xmm6 movdqa %xmm4,16-128(%rax) paddd %xmm4,%xmm10 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm12,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 paddd %xmm0,%xmm0 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 80-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm7 pxor 176-128(%rax),%xmm1 pxor %xmm3,%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 movdqa %xmm10,%xmm9 pand %xmm12,%xmm7 movdqa %xmm13,%xmm6 movdqa %xmm1,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm14 pxor %xmm12,%xmm6 movdqa %xmm0,32-128(%rax) paddd %xmm0,%xmm14 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm11,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 paddd %xmm1,%xmm1 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 96-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm7 pxor 192-128(%rax),%xmm2 pxor %xmm4,%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 movdqa %xmm14,%xmm9 pand %xmm11,%xmm7 movdqa %xmm12,%xmm6 movdqa %xmm2,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm13 pxor %xmm11,%xmm6 movdqa %xmm1,48-128(%rax) paddd %xmm1,%xmm13 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm10,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 paddd %xmm2,%xmm2 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 112-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm7 pxor 208-128(%rax),%xmm3 pxor %xmm0,%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 movdqa %xmm13,%xmm9 pand %xmm10,%xmm7 movdqa %xmm11,%xmm6 movdqa %xmm3,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm12 pxor %xmm10,%xmm6 movdqa %xmm2,64-128(%rax) paddd %xmm2,%xmm12 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm14,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 paddd %xmm3,%xmm3 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 128-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm7 pxor 224-128(%rax),%xmm4 pxor %xmm1,%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 movdqa %xmm12,%xmm9 pand %xmm14,%xmm7 movdqa %xmm10,%xmm6 movdqa %xmm4,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm11 pxor %xmm14,%xmm6 movdqa %xmm3,80-128(%rax) paddd %xmm3,%xmm11 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm13,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 paddd %xmm4,%xmm4 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 144-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm7 pxor 240-128(%rax),%xmm0 pxor %xmm2,%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 movdqa %xmm11,%xmm9 pand %xmm13,%xmm7 movdqa %xmm14,%xmm6 movdqa %xmm0,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm10 pxor %xmm13,%xmm6 movdqa %xmm4,96-128(%rax) paddd %xmm4,%xmm10 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm12,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 paddd %xmm0,%xmm0 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 160-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm7 pxor 0-128(%rax),%xmm1 pxor %xmm3,%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 movdqa %xmm10,%xmm9 pand %xmm12,%xmm7 movdqa %xmm13,%xmm6 movdqa %xmm1,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm14 pxor %xmm12,%xmm6 movdqa %xmm0,112-128(%rax) paddd %xmm0,%xmm14 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm11,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 paddd %xmm1,%xmm1 paddd %xmm6,%xmm14 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 176-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm7 pxor 16-128(%rax),%xmm2 pxor %xmm4,%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 movdqa %xmm14,%xmm9 pand %xmm11,%xmm7 movdqa %xmm12,%xmm6 movdqa %xmm2,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm13 pxor %xmm11,%xmm6 movdqa %xmm1,128-128(%rax) paddd %xmm1,%xmm13 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm10,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 paddd %xmm2,%xmm2 paddd %xmm6,%xmm13 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 192-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm7 pxor 32-128(%rax),%xmm3 pxor %xmm0,%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 movdqa %xmm13,%xmm9 pand %xmm10,%xmm7 movdqa %xmm11,%xmm6 movdqa %xmm3,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm12 pxor %xmm10,%xmm6 movdqa %xmm2,144-128(%rax) paddd %xmm2,%xmm12 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm14,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 paddd %xmm3,%xmm3 paddd %xmm6,%xmm12 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 208-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm7 pxor 48-128(%rax),%xmm4 pxor %xmm1,%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 movdqa %xmm12,%xmm9 pand %xmm14,%xmm7 movdqa %xmm10,%xmm6 movdqa %xmm4,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm11 pxor %xmm14,%xmm6 movdqa %xmm3,160-128(%rax) paddd %xmm3,%xmm11 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm13,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 paddd %xmm4,%xmm4 paddd %xmm6,%xmm11 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 224-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm7 pxor 64-128(%rax),%xmm0 pxor %xmm2,%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 movdqa %xmm11,%xmm9 pand %xmm13,%xmm7 movdqa %xmm14,%xmm6 movdqa %xmm0,%xmm5 psrld $27,%xmm9 paddd %xmm7,%xmm10 pxor %xmm13,%xmm6 movdqa %xmm4,176-128(%rax) paddd %xmm4,%xmm10 por %xmm9,%xmm8 psrld $31,%xmm5 pand %xmm12,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 paddd %xmm0,%xmm0 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 movdqa 64(%rbp),%xmm15 pxor %xmm3,%xmm1 movdqa 240-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 80-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,192-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 0-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 96-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,208-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 16-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 112-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,224-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 32-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 128-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,240-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 48-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 144-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,0-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 64-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 160-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,16-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 80-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 176-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,32-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 96-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 192-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 movdqa %xmm2,48-128(%rax) paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 112-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 208-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 movdqa %xmm3,64-128(%rax) paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 128-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 224-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 movdqa %xmm4,80-128(%rax) paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 144-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 240-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 movdqa %xmm0,96-128(%rax) paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 160-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 0-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 movdqa %xmm1,112-128(%rax) paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 176-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 16-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 192-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 32-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 pxor %xmm2,%xmm0 movdqa 208-128(%rax),%xmm2 movdqa %xmm11,%xmm8 movdqa %xmm14,%xmm6 pxor 48-128(%rax),%xmm0 paddd %xmm15,%xmm10 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 paddd %xmm4,%xmm10 pxor %xmm2,%xmm0 psrld $27,%xmm9 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm7 pslld $30,%xmm7 movdqa %xmm0,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm10 paddd %xmm0,%xmm0 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm5,%xmm0 por %xmm7,%xmm12 pxor %xmm3,%xmm1 movdqa 224-128(%rax),%xmm3 movdqa %xmm10,%xmm8 movdqa %xmm13,%xmm6 pxor 64-128(%rax),%xmm1 paddd %xmm15,%xmm14 pslld $5,%xmm8 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm9 paddd %xmm0,%xmm14 pxor %xmm3,%xmm1 psrld $27,%xmm9 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm7 pslld $30,%xmm7 movdqa %xmm1,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm14 paddd %xmm1,%xmm1 psrld $2,%xmm11 paddd %xmm8,%xmm14 por %xmm5,%xmm1 por %xmm7,%xmm11 pxor %xmm4,%xmm2 movdqa 240-128(%rax),%xmm4 movdqa %xmm14,%xmm8 movdqa %xmm12,%xmm6 pxor 80-128(%rax),%xmm2 paddd %xmm15,%xmm13 pslld $5,%xmm8 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm9 paddd %xmm1,%xmm13 pxor %xmm4,%xmm2 psrld $27,%xmm9 pxor %xmm11,%xmm6 movdqa %xmm10,%xmm7 pslld $30,%xmm7 movdqa %xmm2,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm13 paddd %xmm2,%xmm2 psrld $2,%xmm10 paddd %xmm8,%xmm13 por %xmm5,%xmm2 por %xmm7,%xmm10 pxor %xmm0,%xmm3 movdqa 0-128(%rax),%xmm0 movdqa %xmm13,%xmm8 movdqa %xmm11,%xmm6 pxor 96-128(%rax),%xmm3 paddd %xmm15,%xmm12 pslld $5,%xmm8 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm9 paddd %xmm2,%xmm12 pxor %xmm0,%xmm3 psrld $27,%xmm9 pxor %xmm10,%xmm6 movdqa %xmm14,%xmm7 pslld $30,%xmm7 movdqa %xmm3,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm12 paddd %xmm3,%xmm3 psrld $2,%xmm14 paddd %xmm8,%xmm12 por %xmm5,%xmm3 por %xmm7,%xmm14 pxor %xmm1,%xmm4 movdqa 16-128(%rax),%xmm1 movdqa %xmm12,%xmm8 movdqa %xmm10,%xmm6 pxor 112-128(%rax),%xmm4 paddd %xmm15,%xmm11 pslld $5,%xmm8 pxor %xmm13,%xmm6 movdqa %xmm12,%xmm9 paddd %xmm3,%xmm11 pxor %xmm1,%xmm4 psrld $27,%xmm9 pxor %xmm14,%xmm6 movdqa %xmm13,%xmm7 pslld $30,%xmm7 movdqa %xmm4,%xmm5 por %xmm9,%xmm8 psrld $31,%xmm5 paddd %xmm6,%xmm11 paddd %xmm4,%xmm4 psrld $2,%xmm13 paddd %xmm8,%xmm11 por %xmm5,%xmm4 por %xmm7,%xmm13 movdqa %xmm11,%xmm8 paddd %xmm15,%xmm10 movdqa %xmm14,%xmm6 pslld $5,%xmm8 pxor %xmm12,%xmm6 movdqa %xmm11,%xmm9 paddd %xmm4,%xmm10 psrld $27,%xmm9 movdqa %xmm12,%xmm7 pxor %xmm13,%xmm6 pslld $30,%xmm7 por %xmm9,%xmm8 paddd %xmm6,%xmm10 psrld $2,%xmm12 paddd %xmm8,%xmm10 por %xmm7,%xmm12 movdqa (%rbx),%xmm0 movl $1,%ecx cmpl 0(%rbx),%ecx pxor %xmm8,%xmm8 cmovgeq %rbp,%r8 cmpl 4(%rbx),%ecx movdqa %xmm0,%xmm1 cmovgeq %rbp,%r9 cmpl 8(%rbx),%ecx pcmpgtd %xmm8,%xmm1 cmovgeq %rbp,%r10 cmpl 12(%rbx),%ecx paddd %xmm1,%xmm0 cmovgeq %rbp,%r11 movdqu 0(%rdi),%xmm6 pand %xmm1,%xmm10 movdqu 32(%rdi),%xmm7 pand %xmm1,%xmm11 paddd %xmm6,%xmm10 movdqu 64(%rdi),%xmm8 pand %xmm1,%xmm12 paddd %xmm7,%xmm11 movdqu 96(%rdi),%xmm9 pand %xmm1,%xmm13 paddd %xmm8,%xmm12 movdqu 128(%rdi),%xmm5 pand %xmm1,%xmm14 movdqu %xmm10,0(%rdi) paddd %xmm9,%xmm13 movdqu %xmm11,32(%rdi) paddd %xmm5,%xmm14 movdqu %xmm12,64(%rdi) movdqu %xmm13,96(%rdi) movdqu %xmm14,128(%rdi) movdqa %xmm0,(%rbx) movdqa 96(%rbp),%xmm5 movdqa -32(%rbp),%xmm15 decl %edx jnz .Loop movl 280(%rsp),%edx leaq 16(%rdi),%rdi leaq 64(%rsi),%rsi decl %edx jnz .Loop_grande .Ldone: movq 272(%rsp),%rax movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha1_multi_block,.-sha1_multi_block .type sha1_multi_block_shaext,@function .align 32 sha1_multi_block_shaext: _shaext_shortcut: movq %rsp,%rax pushq %rbx pushq %rbp subq $288,%rsp shll $1,%edx andq $-256,%rsp leaq 64(%rdi),%rdi movq %rax,272(%rsp) .Lbody_shaext: leaq 256(%rsp),%rbx movdqa K_XX_XX+128(%rip),%xmm3 .Loop_grande_shaext: movl %edx,280(%rsp) xorl %edx,%edx movq 0(%rsi),%r8 movl 8(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,0(%rbx) cmovleq %rsp,%r8 movq 16(%rsi),%r9 movl 24(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,4(%rbx) cmovleq %rsp,%r9 testl %edx,%edx jz .Ldone_shaext movq 0-64(%rdi),%xmm0 movq 32-64(%rdi),%xmm4 movq 64-64(%rdi),%xmm5 movq 96-64(%rdi),%xmm6 movq 128-64(%rdi),%xmm7 punpckldq %xmm4,%xmm0 punpckldq %xmm6,%xmm5 movdqa %xmm0,%xmm8 punpcklqdq %xmm5,%xmm0 punpckhqdq %xmm5,%xmm8 pshufd $63,%xmm7,%xmm1 pshufd $127,%xmm7,%xmm9 pshufd $27,%xmm0,%xmm0 pshufd $27,%xmm8,%xmm8 jmp .Loop_shaext .align 32 .Loop_shaext: movdqu 0(%r8),%xmm4 movdqu 0(%r9),%xmm11 movdqu 16(%r8),%xmm5 movdqu 16(%r9),%xmm12 movdqu 32(%r8),%xmm6 .byte 102,15,56,0,227 movdqu 32(%r9),%xmm13 .byte 102,68,15,56,0,219 movdqu 48(%r8),%xmm7 leaq 64(%r8),%r8 .byte 102,15,56,0,235 movdqu 48(%r9),%xmm14 leaq 64(%r9),%r9 .byte 102,68,15,56,0,227 movdqa %xmm1,80(%rsp) paddd %xmm4,%xmm1 movdqa %xmm9,112(%rsp) paddd %xmm11,%xmm9 movdqa %xmm0,64(%rsp) movdqa %xmm0,%xmm2 movdqa %xmm8,96(%rsp) movdqa %xmm8,%xmm10 .byte 15,58,204,193,0 .byte 15,56,200,213 .byte 69,15,58,204,193,0 .byte 69,15,56,200,212 .byte 102,15,56,0,243 prefetcht0 127(%r8) .byte 15,56,201,229 .byte 102,68,15,56,0,235 prefetcht0 127(%r9) .byte 69,15,56,201,220 .byte 102,15,56,0,251 movdqa %xmm0,%xmm1 .byte 102,68,15,56,0,243 movdqa %xmm8,%xmm9 .byte 15,58,204,194,0 .byte 15,56,200,206 .byte 69,15,58,204,194,0 .byte 69,15,56,200,205 pxor %xmm6,%xmm4 .byte 15,56,201,238 pxor %xmm13,%xmm11 .byte 69,15,56,201,229 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,0 .byte 15,56,200,215 .byte 69,15,58,204,193,0 .byte 69,15,56,200,214 .byte 15,56,202,231 .byte 69,15,56,202,222 pxor %xmm7,%xmm5 .byte 15,56,201,247 pxor %xmm14,%xmm12 .byte 69,15,56,201,238 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,0 .byte 15,56,200,204 .byte 69,15,58,204,194,0 .byte 69,15,56,200,203 .byte 15,56,202,236 .byte 69,15,56,202,227 pxor %xmm4,%xmm6 .byte 15,56,201,252 pxor %xmm11,%xmm13 .byte 69,15,56,201,243 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,0 .byte 15,56,200,213 .byte 69,15,58,204,193,0 .byte 69,15,56,200,212 .byte 15,56,202,245 .byte 69,15,56,202,236 pxor %xmm5,%xmm7 .byte 15,56,201,229 pxor %xmm12,%xmm14 .byte 69,15,56,201,220 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,1 .byte 15,56,200,206 .byte 69,15,58,204,194,1 .byte 69,15,56,200,205 .byte 15,56,202,254 .byte 69,15,56,202,245 pxor %xmm6,%xmm4 .byte 15,56,201,238 pxor %xmm13,%xmm11 .byte 69,15,56,201,229 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,1 .byte 15,56,200,215 .byte 69,15,58,204,193,1 .byte 69,15,56,200,214 .byte 15,56,202,231 .byte 69,15,56,202,222 pxor %xmm7,%xmm5 .byte 15,56,201,247 pxor %xmm14,%xmm12 .byte 69,15,56,201,238 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,1 .byte 15,56,200,204 .byte 69,15,58,204,194,1 .byte 69,15,56,200,203 .byte 15,56,202,236 .byte 69,15,56,202,227 pxor %xmm4,%xmm6 .byte 15,56,201,252 pxor %xmm11,%xmm13 .byte 69,15,56,201,243 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,1 .byte 15,56,200,213 .byte 69,15,58,204,193,1 .byte 69,15,56,200,212 .byte 15,56,202,245 .byte 69,15,56,202,236 pxor %xmm5,%xmm7 .byte 15,56,201,229 pxor %xmm12,%xmm14 .byte 69,15,56,201,220 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,1 .byte 15,56,200,206 .byte 69,15,58,204,194,1 .byte 69,15,56,200,205 .byte 15,56,202,254 .byte 69,15,56,202,245 pxor %xmm6,%xmm4 .byte 15,56,201,238 pxor %xmm13,%xmm11 .byte 69,15,56,201,229 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,2 .byte 15,56,200,215 .byte 69,15,58,204,193,2 .byte 69,15,56,200,214 .byte 15,56,202,231 .byte 69,15,56,202,222 pxor %xmm7,%xmm5 .byte 15,56,201,247 pxor %xmm14,%xmm12 .byte 69,15,56,201,238 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,2 .byte 15,56,200,204 .byte 69,15,58,204,194,2 .byte 69,15,56,200,203 .byte 15,56,202,236 .byte 69,15,56,202,227 pxor %xmm4,%xmm6 .byte 15,56,201,252 pxor %xmm11,%xmm13 .byte 69,15,56,201,243 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,2 .byte 15,56,200,213 .byte 69,15,58,204,193,2 .byte 69,15,56,200,212 .byte 15,56,202,245 .byte 69,15,56,202,236 pxor %xmm5,%xmm7 .byte 15,56,201,229 pxor %xmm12,%xmm14 .byte 69,15,56,201,220 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,2 .byte 15,56,200,206 .byte 69,15,58,204,194,2 .byte 69,15,56,200,205 .byte 15,56,202,254 .byte 69,15,56,202,245 pxor %xmm6,%xmm4 .byte 15,56,201,238 pxor %xmm13,%xmm11 .byte 69,15,56,201,229 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,2 .byte 15,56,200,215 .byte 69,15,58,204,193,2 .byte 69,15,56,200,214 .byte 15,56,202,231 .byte 69,15,56,202,222 pxor %xmm7,%xmm5 .byte 15,56,201,247 pxor %xmm14,%xmm12 .byte 69,15,56,201,238 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,3 .byte 15,56,200,204 .byte 69,15,58,204,194,3 .byte 69,15,56,200,203 .byte 15,56,202,236 .byte 69,15,56,202,227 pxor %xmm4,%xmm6 .byte 15,56,201,252 pxor %xmm11,%xmm13 .byte 69,15,56,201,243 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,3 .byte 15,56,200,213 .byte 69,15,58,204,193,3 .byte 69,15,56,200,212 .byte 15,56,202,245 .byte 69,15,56,202,236 pxor %xmm5,%xmm7 pxor %xmm12,%xmm14 movl $1,%ecx pxor %xmm4,%xmm4 cmpl 0(%rbx),%ecx cmovgeq %rsp,%r8 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,3 .byte 15,56,200,206 .byte 69,15,58,204,194,3 .byte 69,15,56,200,205 .byte 15,56,202,254 .byte 69,15,56,202,245 cmpl 4(%rbx),%ecx cmovgeq %rsp,%r9 movq (%rbx),%xmm6 movdqa %xmm0,%xmm2 movdqa %xmm8,%xmm10 .byte 15,58,204,193,3 .byte 15,56,200,215 .byte 69,15,58,204,193,3 .byte 69,15,56,200,214 pshufd $0x00,%xmm6,%xmm11 pshufd $0x55,%xmm6,%xmm12 movdqa %xmm6,%xmm7 pcmpgtd %xmm4,%xmm11 pcmpgtd %xmm4,%xmm12 movdqa %xmm0,%xmm1 movdqa %xmm8,%xmm9 .byte 15,58,204,194,3 .byte 15,56,200,204 .byte 69,15,58,204,194,3 .byte 68,15,56,200,204 pcmpgtd %xmm4,%xmm7 pand %xmm11,%xmm0 pand %xmm11,%xmm1 pand %xmm12,%xmm8 pand %xmm12,%xmm9 paddd %xmm7,%xmm6 paddd 64(%rsp),%xmm0 paddd 80(%rsp),%xmm1 paddd 96(%rsp),%xmm8 paddd 112(%rsp),%xmm9 movq %xmm6,(%rbx) decl %edx jnz .Loop_shaext movl 280(%rsp),%edx pshufd $27,%xmm0,%xmm0 pshufd $27,%xmm8,%xmm8 movdqa %xmm0,%xmm6 punpckldq %xmm8,%xmm0 punpckhdq %xmm8,%xmm6 punpckhdq %xmm9,%xmm1 movq %xmm0,0-64(%rdi) psrldq $8,%xmm0 movq %xmm6,64-64(%rdi) psrldq $8,%xmm6 movq %xmm0,32-64(%rdi) psrldq $8,%xmm1 movq %xmm6,96-64(%rdi) movq %xmm1,128-64(%rdi) leaq 8(%rdi),%rdi leaq 32(%rsi),%rsi decl %edx jnz .Loop_grande_shaext .Ldone_shaext: movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lepilogue_shaext: .byte 0xf3,0xc3 .size sha1_multi_block_shaext,.-sha1_multi_block_shaext +.type sha1_multi_block_avx,@function +.align 32 +sha1_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody_avx: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + + vzeroupper +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%xmm11 + vmovdqu 64(%rdi),%xmm12 + vmovdqu 96(%rdi),%xmm13 + vmovdqu 128(%rdi),%xmm14 + vmovdqu 96(%rbp),%xmm5 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vmovdqa -32(%rbp),%xmm15 + vmovd (%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd (%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vmovd -60(%r8),%xmm1 + vpunpckldq %xmm2,%xmm0,%xmm0 + vmovd -60(%r9),%xmm9 + vpshufb %xmm5,%xmm0,%xmm0 + vpinsrd $1,-60(%r10),%xmm1,%xmm1 + vpinsrd $1,-60(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,0-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -56(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -56(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-56(%r10),%xmm2,%xmm2 + vpinsrd $1,-56(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,16-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -52(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -52(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-52(%r10),%xmm3,%xmm3 + vpinsrd $1,-52(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,32-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -48(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -48(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm4,%xmm4 + vpinsrd $1,-48(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,48-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -44(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -44(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-44(%r10),%xmm0,%xmm0 + vpinsrd $1,-44(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,64-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -40(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -40(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-40(%r10),%xmm1,%xmm1 + vpinsrd $1,-40(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,80-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -36(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -36(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-36(%r10),%xmm2,%xmm2 + vpinsrd $1,-36(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,96-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -32(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -32(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-32(%r10),%xmm3,%xmm3 + vpinsrd $1,-32(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,112-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -28(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -28(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm4,%xmm4 + vpinsrd $1,-28(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,128-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -24(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -24(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-24(%r10),%xmm0,%xmm0 + vpinsrd $1,-24(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,144-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -20(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -20(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-20(%r10),%xmm1,%xmm1 + vpinsrd $1,-20(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,160-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -16(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -16(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-16(%r10),%xmm2,%xmm2 + vpinsrd $1,-16(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,176-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -12(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -12(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-12(%r10),%xmm3,%xmm3 + vpinsrd $1,-12(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,192-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -8(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -8(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm4,%xmm4 + vpinsrd $1,-8(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,208-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -4(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -4(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vmovdqa 0-128(%rax),%xmm1 + vpinsrd $1,-4(%r10),%xmm0,%xmm0 + vpinsrd $1,-4(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + prefetcht0 63(%r8) + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,224-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + prefetcht0 63(%r9) + vpxor %xmm7,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + prefetcht0 63(%r10) + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + prefetcht0 63(%r11) + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 16-128(%rax),%xmm2 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 32-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,240-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 128-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 48-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,0-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 144-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 64-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,16-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 160-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 80-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,32-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 176-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 96-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,48-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 192-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 0(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 112-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,64-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 208-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 128-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,80-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 224-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 144-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,96-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 240-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 160-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,112-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 0-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 176-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,128-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 16-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 192-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,144-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 32-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 208-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,160-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 48-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 224-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,176-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 64-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 240-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,192-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 80-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 0-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,208-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 96-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 16-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,224-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 112-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 32-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,240-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 128-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 48-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,0-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 144-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 64-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,16-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 160-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 80-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,32-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 176-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 96-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,48-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 192-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 112-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,64-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 208-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 128-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,80-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 224-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 144-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,96-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 240-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 160-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,112-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 0-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 32(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 176-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 16-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,128-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 192-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 32-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,144-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 208-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 48-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,160-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 224-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 64-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,176-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 240-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 80-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,192-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 0-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 96-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,208-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 16-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 112-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,224-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 32-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 128-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,240-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 48-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 144-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,0-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 64-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 160-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,16-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 80-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 176-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,32-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 96-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 192-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,48-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 112-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 208-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,64-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 128-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 224-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,80-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 144-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 240-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,96-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 160-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 0-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,112-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 176-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 16-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,128-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 192-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 32-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,144-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 208-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 48-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,160-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 224-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 64-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,176-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 64(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 240-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,192-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 80-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 0-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,208-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 96-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 16-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,224-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 112-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 32-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,240-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 128-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 48-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,0-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 144-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 64-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,16-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 160-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 80-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,32-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 176-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 96-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,48-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 192-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 112-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,64-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 208-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 128-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,80-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 224-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 144-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,96-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 240-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 160-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,112-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 0-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 176-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 16-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 192-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 32-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 208-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 48-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 224-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 64-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 240-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 80-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 0-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 96-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 16-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 112-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + + vpsrld $27,%xmm11,%xmm9 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor %xmm13,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm7,%xmm12,%xmm12 + movl $1,%ecx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%xmm6 + vpxor %xmm8,%xmm8,%xmm8 + vmovdqa %xmm6,%xmm7 + vpcmpgtd %xmm8,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + + vpand %xmm7,%xmm10,%xmm10 + vpand %xmm7,%xmm11,%xmm11 + vpaddd 0(%rdi),%xmm10,%xmm10 + vpand %xmm7,%xmm12,%xmm12 + vpaddd 32(%rdi),%xmm11,%xmm11 + vpand %xmm7,%xmm13,%xmm13 + vpaddd 64(%rdi),%xmm12,%xmm12 + vpand %xmm7,%xmm14,%xmm14 + vpaddd 96(%rdi),%xmm13,%xmm13 + vpaddd 128(%rdi),%xmm14,%xmm14 + vmovdqu %xmm10,0(%rdi) + vmovdqu %xmm11,32(%rdi) + vmovdqu %xmm12,64(%rdi) + vmovdqu %xmm13,96(%rdi) + vmovdqu %xmm14,128(%rdi) + + vmovdqu %xmm6,(%rbx) + vmovdqu 96(%rbp),%xmm5 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha1_multi_block_avx,.-sha1_multi_block_avx +.type sha1_multi_block_avx2,@function +.align 32 +sha1_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.Lbody_avx2: + leaq K_XX_XX(%rip),%rbp + shrl $1,%edx + + vzeroupper +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0(%rdi),%ymm0 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%ymm1 + leaq 256+128(%rsp),%rbx + vmovdqu 64(%rdi),%ymm2 + vmovdqu 96(%rdi),%ymm3 + vmovdqu 128(%rdi),%ymm4 + vmovdqu 96(%rbp),%ymm9 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vmovdqa -32(%rbp),%ymm15 + vmovd (%r12),%xmm10 + leaq 64(%r12),%r12 + vmovd (%r8),%xmm12 + leaq 64(%r8),%r8 + vmovd (%r13),%xmm7 + leaq 64(%r13),%r13 + vmovd (%r9),%xmm6 + leaq 64(%r9),%r9 + vpinsrd $1,(%r14),%xmm10,%xmm10 + leaq 64(%r14),%r14 + vpinsrd $1,(%r10),%xmm12,%xmm12 + leaq 64(%r10),%r10 + vpinsrd $1,(%r15),%xmm7,%xmm7 + leaq 64(%r15),%r15 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,(%r11),%xmm6,%xmm6 + leaq 64(%r11),%r11 + vpunpckldq %ymm6,%ymm12,%ymm12 + vmovd -60(%r12),%xmm11 + vinserti128 $1,%xmm12,%ymm10,%ymm10 + vmovd -60(%r8),%xmm8 + vpshufb %ymm9,%ymm10,%ymm10 + vmovd -60(%r13),%xmm7 + vmovd -60(%r9),%xmm6 + vpinsrd $1,-60(%r14),%xmm11,%xmm11 + vpinsrd $1,-60(%r10),%xmm8,%xmm8 + vpinsrd $1,-60(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-60(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,0-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -56(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -56(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -56(%r13),%xmm7 + vmovd -56(%r9),%xmm6 + vpinsrd $1,-56(%r14),%xmm12,%xmm12 + vpinsrd $1,-56(%r10),%xmm8,%xmm8 + vpinsrd $1,-56(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-56(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,32-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -52(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -52(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -52(%r13),%xmm7 + vmovd -52(%r9),%xmm6 + vpinsrd $1,-52(%r14),%xmm13,%xmm13 + vpinsrd $1,-52(%r10),%xmm8,%xmm8 + vpinsrd $1,-52(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-52(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,64-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -48(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -48(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -48(%r13),%xmm7 + vmovd -48(%r9),%xmm6 + vpinsrd $1,-48(%r14),%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm8,%xmm8 + vpinsrd $1,-48(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-48(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,96-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -44(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -44(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -44(%r13),%xmm7 + vmovd -44(%r9),%xmm6 + vpinsrd $1,-44(%r14),%xmm10,%xmm10 + vpinsrd $1,-44(%r10),%xmm8,%xmm8 + vpinsrd $1,-44(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-44(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,128-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -40(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -40(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -40(%r13),%xmm7 + vmovd -40(%r9),%xmm6 + vpinsrd $1,-40(%r14),%xmm11,%xmm11 + vpinsrd $1,-40(%r10),%xmm8,%xmm8 + vpinsrd $1,-40(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-40(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,160-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -36(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -36(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -36(%r13),%xmm7 + vmovd -36(%r9),%xmm6 + vpinsrd $1,-36(%r14),%xmm12,%xmm12 + vpinsrd $1,-36(%r10),%xmm8,%xmm8 + vpinsrd $1,-36(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-36(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,192-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -32(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -32(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -32(%r13),%xmm7 + vmovd -32(%r9),%xmm6 + vpinsrd $1,-32(%r14),%xmm13,%xmm13 + vpinsrd $1,-32(%r10),%xmm8,%xmm8 + vpinsrd $1,-32(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-32(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,224-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -28(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -28(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -28(%r13),%xmm7 + vmovd -28(%r9),%xmm6 + vpinsrd $1,-28(%r14),%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm8,%xmm8 + vpinsrd $1,-28(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-28(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,256-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -24(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -24(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -24(%r13),%xmm7 + vmovd -24(%r9),%xmm6 + vpinsrd $1,-24(%r14),%xmm10,%xmm10 + vpinsrd $1,-24(%r10),%xmm8,%xmm8 + vpinsrd $1,-24(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-24(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,288-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -20(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -20(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -20(%r13),%xmm7 + vmovd -20(%r9),%xmm6 + vpinsrd $1,-20(%r14),%xmm11,%xmm11 + vpinsrd $1,-20(%r10),%xmm8,%xmm8 + vpinsrd $1,-20(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-20(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,320-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -16(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -16(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -16(%r13),%xmm7 + vmovd -16(%r9),%xmm6 + vpinsrd $1,-16(%r14),%xmm12,%xmm12 + vpinsrd $1,-16(%r10),%xmm8,%xmm8 + vpinsrd $1,-16(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-16(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,352-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -12(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -12(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -12(%r13),%xmm7 + vmovd -12(%r9),%xmm6 + vpinsrd $1,-12(%r14),%xmm13,%xmm13 + vpinsrd $1,-12(%r10),%xmm8,%xmm8 + vpinsrd $1,-12(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-12(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,384-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -8(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -8(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -8(%r13),%xmm7 + vmovd -8(%r9),%xmm6 + vpinsrd $1,-8(%r14),%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm8,%xmm8 + vpinsrd $1,-8(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-8(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,416-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -4(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -4(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovdqa 0-128(%rax),%ymm11 + vmovd -4(%r13),%xmm7 + vmovd -4(%r9),%xmm6 + vpinsrd $1,-4(%r14),%xmm10,%xmm10 + vpinsrd $1,-4(%r10),%xmm8,%xmm8 + vpinsrd $1,-4(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-4(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + prefetcht0 63(%r12) + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,448-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + prefetcht0 63(%r13) + vpxor %ymm6,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + prefetcht0 63(%r15) + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32-128(%rax),%ymm12 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 64-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + prefetcht0 63(%r8) + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,480-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 256-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + prefetcht0 63(%r9) + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + prefetcht0 63(%r10) + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + prefetcht0 63(%r11) + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 96-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,0-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 288-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 128-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,32-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 320-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 160-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,64-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 352-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 192-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,96-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 384-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 0(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 224-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,128-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 416-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 256-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,160-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 448-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 288-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,192-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 480-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 320-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,224-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 0-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 352-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,256-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 32-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 384-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,288-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 64-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 416-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,320-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 96-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 448-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,352-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 128-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 480-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,384-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 160-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 0-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,416-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 192-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 32-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,448-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 224-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 64-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,480-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 256-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 96-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,0-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 288-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 128-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,32-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 320-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 160-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,64-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 352-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 192-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,96-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 384-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 224-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,128-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 416-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 256-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,160-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 448-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 288-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,192-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 480-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 320-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,224-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 0-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 352-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 32-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,256-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 384-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 64-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,288-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 416-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 96-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,320-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 448-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 128-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,352-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 480-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 160-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,384-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 0-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 192-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,416-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 32-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 224-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,448-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 64-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 256-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,480-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 96-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 288-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,0-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 128-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 320-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,32-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 160-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 352-256-128(%rbx),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,64-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 192-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 384-256-128(%rbx),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,96-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 224-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 416-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,128-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 256-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 448-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,160-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 288-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 480-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,192-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 320-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 0-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,224-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 352-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 32-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,256-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 384-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 64-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,288-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 416-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 96-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,320-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 448-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 128-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,352-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 64(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 480-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,384-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 160-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 0-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,416-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 192-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 32-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,448-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 224-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 64-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,480-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 256-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 96-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,0-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 288-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 128-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,32-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 320-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 160-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,64-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 352-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 192-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,96-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 384-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 224-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,128-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 416-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 256-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,160-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 448-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 288-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,192-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 480-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 320-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,224-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 0-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 352-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 32-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 384-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 64-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 416-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 96-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 448-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 128-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 480-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 160-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 0-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 192-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 32-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 224-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + + vpsrld $27,%ymm1,%ymm8 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor %ymm3,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm6,%ymm2,%ymm2 + movl $1,%ecx + leaq 512(%rsp),%rbx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%ymm5 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm5,%ymm6 + vpcmpgtd %ymm7,%ymm6,%ymm6 + vpaddd %ymm6,%ymm5,%ymm5 + + vpand %ymm6,%ymm0,%ymm0 + vpand %ymm6,%ymm1,%ymm1 + vpaddd 0(%rdi),%ymm0,%ymm0 + vpand %ymm6,%ymm2,%ymm2 + vpaddd 32(%rdi),%ymm1,%ymm1 + vpand %ymm6,%ymm3,%ymm3 + vpaddd 64(%rdi),%ymm2,%ymm2 + vpand %ymm6,%ymm4,%ymm4 + vpaddd 96(%rdi),%ymm3,%ymm3 + vpaddd 128(%rdi),%ymm4,%ymm4 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + + vmovdqu %ymm5,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu 96(%rbp),%ymm9 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha1_multi_block_avx2,.-sha1_multi_block_avx2 .align 256 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 K_XX_XX: .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Index: head/secure/lib/libcrypto/amd64/sha1-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/sha1-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/sha1-x86_64.S (revision 299481) @@ -1,2592 +1,5398 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha1-x86_64.pl. .text .globl sha1_block_data_order .type sha1_block_data_order,@function .align 16 sha1_block_data_order: movl OPENSSL_ia32cap_P+0(%rip),%r9d movl OPENSSL_ia32cap_P+4(%rip),%r8d movl OPENSSL_ia32cap_P+8(%rip),%r10d testl $512,%r8d jz .Lialu testl $536870912,%r10d jnz _shaext_shortcut + andl $296,%r10d + cmpl $296,%r10d + je _avx2_shortcut + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut jmp _ssse3_shortcut .align 16 .Lialu: movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 movq %rdi,%r8 subq $72,%rsp movq %rsi,%r9 andq $-64,%rsp movq %rdx,%r10 movq %rax,64(%rsp) .Lprologue: movl 0(%r8),%esi movl 4(%r8),%edi movl 8(%r8),%r11d movl 12(%r8),%r12d movl 16(%r8),%r13d jmp .Lloop .align 16 .Lloop: movl 0(%r9),%edx bswapl %edx movl 4(%r9),%ebp movl %r12d,%eax movl %edx,0(%rsp) movl %esi,%ecx bswapl %ebp xorl %r11d,%eax roll $5,%ecx andl %edi,%eax leal 1518500249(%rdx,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d movl 8(%r9),%r14d movl %r11d,%eax movl %ebp,4(%rsp) movl %r13d,%ecx bswapl %r14d xorl %edi,%eax roll $5,%ecx andl %esi,%eax leal 1518500249(%rbp,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d movl 12(%r9),%edx movl %edi,%eax movl %r14d,8(%rsp) movl %r12d,%ecx bswapl %edx xorl %esi,%eax roll $5,%ecx andl %r13d,%eax leal 1518500249(%r14,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d movl 16(%r9),%ebp movl %esi,%eax movl %edx,12(%rsp) movl %r11d,%ecx bswapl %ebp xorl %r13d,%eax roll $5,%ecx andl %r12d,%eax leal 1518500249(%rdx,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi movl 20(%r9),%r14d movl %r13d,%eax movl %ebp,16(%rsp) movl %edi,%ecx bswapl %r14d xorl %r12d,%eax roll $5,%ecx andl %r11d,%eax leal 1518500249(%rbp,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi movl 24(%r9),%edx movl %r12d,%eax movl %r14d,20(%rsp) movl %esi,%ecx bswapl %edx xorl %r11d,%eax roll $5,%ecx andl %edi,%eax leal 1518500249(%r14,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d movl 28(%r9),%ebp movl %r11d,%eax movl %edx,24(%rsp) movl %r13d,%ecx bswapl %ebp xorl %edi,%eax roll $5,%ecx andl %esi,%eax leal 1518500249(%rdx,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d movl 32(%r9),%r14d movl %edi,%eax movl %ebp,28(%rsp) movl %r12d,%ecx bswapl %r14d xorl %esi,%eax roll $5,%ecx andl %r13d,%eax leal 1518500249(%rbp,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d movl 36(%r9),%edx movl %esi,%eax movl %r14d,32(%rsp) movl %r11d,%ecx bswapl %edx xorl %r13d,%eax roll $5,%ecx andl %r12d,%eax leal 1518500249(%r14,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi movl 40(%r9),%ebp movl %r13d,%eax movl %edx,36(%rsp) movl %edi,%ecx bswapl %ebp xorl %r12d,%eax roll $5,%ecx andl %r11d,%eax leal 1518500249(%rdx,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi movl 44(%r9),%r14d movl %r12d,%eax movl %ebp,40(%rsp) movl %esi,%ecx bswapl %r14d xorl %r11d,%eax roll $5,%ecx andl %edi,%eax leal 1518500249(%rbp,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d movl 48(%r9),%edx movl %r11d,%eax movl %r14d,44(%rsp) movl %r13d,%ecx bswapl %edx xorl %edi,%eax roll $5,%ecx andl %esi,%eax leal 1518500249(%r14,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d movl 52(%r9),%ebp movl %edi,%eax movl %edx,48(%rsp) movl %r12d,%ecx bswapl %ebp xorl %esi,%eax roll $5,%ecx andl %r13d,%eax leal 1518500249(%rdx,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d movl 56(%r9),%r14d movl %esi,%eax movl %ebp,52(%rsp) movl %r11d,%ecx bswapl %r14d xorl %r13d,%eax roll $5,%ecx andl %r12d,%eax leal 1518500249(%rbp,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi movl 60(%r9),%edx movl %r13d,%eax movl %r14d,56(%rsp) movl %edi,%ecx bswapl %edx xorl %r12d,%eax roll $5,%ecx andl %r11d,%eax leal 1518500249(%r14,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi xorl 0(%rsp),%ebp movl %r12d,%eax movl %edx,60(%rsp) movl %esi,%ecx xorl 8(%rsp),%ebp xorl %r11d,%eax roll $5,%ecx xorl 32(%rsp),%ebp andl %edi,%eax leal 1518500249(%rdx,%r13,1),%r13d roll $30,%edi xorl %r12d,%eax addl %ecx,%r13d roll $1,%ebp addl %eax,%r13d xorl 4(%rsp),%r14d movl %r11d,%eax movl %ebp,0(%rsp) movl %r13d,%ecx xorl 12(%rsp),%r14d xorl %edi,%eax roll $5,%ecx xorl 36(%rsp),%r14d andl %esi,%eax leal 1518500249(%rbp,%r12,1),%r12d roll $30,%esi xorl %r11d,%eax addl %ecx,%r12d roll $1,%r14d addl %eax,%r12d xorl 8(%rsp),%edx movl %edi,%eax movl %r14d,4(%rsp) movl %r12d,%ecx xorl 16(%rsp),%edx xorl %esi,%eax roll $5,%ecx xorl 40(%rsp),%edx andl %r13d,%eax leal 1518500249(%r14,%r11,1),%r11d roll $30,%r13d xorl %edi,%eax addl %ecx,%r11d roll $1,%edx addl %eax,%r11d xorl 12(%rsp),%ebp movl %esi,%eax movl %edx,8(%rsp) movl %r11d,%ecx xorl 20(%rsp),%ebp xorl %r13d,%eax roll $5,%ecx xorl 44(%rsp),%ebp andl %r12d,%eax leal 1518500249(%rdx,%rdi,1),%edi roll $30,%r12d xorl %esi,%eax addl %ecx,%edi roll $1,%ebp addl %eax,%edi xorl 16(%rsp),%r14d movl %r13d,%eax movl %ebp,12(%rsp) movl %edi,%ecx xorl 24(%rsp),%r14d xorl %r12d,%eax roll $5,%ecx xorl 48(%rsp),%r14d andl %r11d,%eax leal 1518500249(%rbp,%rsi,1),%esi roll $30,%r11d xorl %r13d,%eax addl %ecx,%esi roll $1,%r14d addl %eax,%esi xorl 20(%rsp),%edx movl %edi,%eax movl %r14d,16(%rsp) movl %esi,%ecx xorl 28(%rsp),%edx xorl %r12d,%eax roll $5,%ecx xorl 52(%rsp),%edx leal 1859775393(%r14,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%edx xorl 24(%rsp),%ebp movl %esi,%eax movl %edx,20(%rsp) movl %r13d,%ecx xorl 32(%rsp),%ebp xorl %r11d,%eax roll $5,%ecx xorl 56(%rsp),%ebp leal 1859775393(%rdx,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%ebp xorl 28(%rsp),%r14d movl %r13d,%eax movl %ebp,24(%rsp) movl %r12d,%ecx xorl 36(%rsp),%r14d xorl %edi,%eax roll $5,%ecx xorl 60(%rsp),%r14d leal 1859775393(%rbp,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%r14d xorl 32(%rsp),%edx movl %r12d,%eax movl %r14d,28(%rsp) movl %r11d,%ecx xorl 40(%rsp),%edx xorl %esi,%eax roll $5,%ecx xorl 0(%rsp),%edx leal 1859775393(%r14,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%edx xorl 36(%rsp),%ebp movl %r11d,%eax movl %edx,32(%rsp) movl %edi,%ecx xorl 44(%rsp),%ebp xorl %r13d,%eax roll $5,%ecx xorl 4(%rsp),%ebp leal 1859775393(%rdx,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%ebp xorl 40(%rsp),%r14d movl %edi,%eax movl %ebp,36(%rsp) movl %esi,%ecx xorl 48(%rsp),%r14d xorl %r12d,%eax roll $5,%ecx xorl 8(%rsp),%r14d leal 1859775393(%rbp,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%r14d xorl 44(%rsp),%edx movl %esi,%eax movl %r14d,40(%rsp) movl %r13d,%ecx xorl 52(%rsp),%edx xorl %r11d,%eax roll $5,%ecx xorl 12(%rsp),%edx leal 1859775393(%r14,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%edx xorl 48(%rsp),%ebp movl %r13d,%eax movl %edx,44(%rsp) movl %r12d,%ecx xorl 56(%rsp),%ebp xorl %edi,%eax roll $5,%ecx xorl 16(%rsp),%ebp leal 1859775393(%rdx,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%ebp xorl 52(%rsp),%r14d movl %r12d,%eax movl %ebp,48(%rsp) movl %r11d,%ecx xorl 60(%rsp),%r14d xorl %esi,%eax roll $5,%ecx xorl 20(%rsp),%r14d leal 1859775393(%rbp,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%r14d xorl 56(%rsp),%edx movl %r11d,%eax movl %r14d,52(%rsp) movl %edi,%ecx xorl 0(%rsp),%edx xorl %r13d,%eax roll $5,%ecx xorl 24(%rsp),%edx leal 1859775393(%r14,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%edx xorl 60(%rsp),%ebp movl %edi,%eax movl %edx,56(%rsp) movl %esi,%ecx xorl 4(%rsp),%ebp xorl %r12d,%eax roll $5,%ecx xorl 28(%rsp),%ebp leal 1859775393(%rdx,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%ebp xorl 0(%rsp),%r14d movl %esi,%eax movl %ebp,60(%rsp) movl %r13d,%ecx xorl 8(%rsp),%r14d xorl %r11d,%eax roll $5,%ecx xorl 32(%rsp),%r14d leal 1859775393(%rbp,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%r14d xorl 4(%rsp),%edx movl %r13d,%eax movl %r14d,0(%rsp) movl %r12d,%ecx xorl 12(%rsp),%edx xorl %edi,%eax roll $5,%ecx xorl 36(%rsp),%edx leal 1859775393(%r14,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%edx xorl 8(%rsp),%ebp movl %r12d,%eax movl %edx,4(%rsp) movl %r11d,%ecx xorl 16(%rsp),%ebp xorl %esi,%eax roll $5,%ecx xorl 40(%rsp),%ebp leal 1859775393(%rdx,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%ebp xorl 12(%rsp),%r14d movl %r11d,%eax movl %ebp,8(%rsp) movl %edi,%ecx xorl 20(%rsp),%r14d xorl %r13d,%eax roll $5,%ecx xorl 44(%rsp),%r14d leal 1859775393(%rbp,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%r14d xorl 16(%rsp),%edx movl %edi,%eax movl %r14d,12(%rsp) movl %esi,%ecx xorl 24(%rsp),%edx xorl %r12d,%eax roll $5,%ecx xorl 48(%rsp),%edx leal 1859775393(%r14,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%edx xorl 20(%rsp),%ebp movl %esi,%eax movl %edx,16(%rsp) movl %r13d,%ecx xorl 28(%rsp),%ebp xorl %r11d,%eax roll $5,%ecx xorl 52(%rsp),%ebp leal 1859775393(%rdx,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%ebp xorl 24(%rsp),%r14d movl %r13d,%eax movl %ebp,20(%rsp) movl %r12d,%ecx xorl 32(%rsp),%r14d xorl %edi,%eax roll $5,%ecx xorl 56(%rsp),%r14d leal 1859775393(%rbp,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%r14d xorl 28(%rsp),%edx movl %r12d,%eax movl %r14d,24(%rsp) movl %r11d,%ecx xorl 36(%rsp),%edx xorl %esi,%eax roll $5,%ecx xorl 60(%rsp),%edx leal 1859775393(%r14,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%edx xorl 32(%rsp),%ebp movl %r11d,%eax movl %edx,28(%rsp) movl %edi,%ecx xorl 40(%rsp),%ebp xorl %r13d,%eax roll $5,%ecx xorl 0(%rsp),%ebp leal 1859775393(%rdx,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%ebp xorl 36(%rsp),%r14d movl %r12d,%eax movl %ebp,32(%rsp) movl %r12d,%ebx xorl 44(%rsp),%r14d andl %r11d,%eax movl %esi,%ecx xorl 4(%rsp),%r14d leal -1894007588(%rbp,%r13,1),%r13d xorl %r11d,%ebx roll $5,%ecx addl %eax,%r13d roll $1,%r14d andl %edi,%ebx addl %ecx,%r13d roll $30,%edi addl %ebx,%r13d xorl 40(%rsp),%edx movl %r11d,%eax movl %r14d,36(%rsp) movl %r11d,%ebx xorl 48(%rsp),%edx andl %edi,%eax movl %r13d,%ecx xorl 8(%rsp),%edx leal -1894007588(%r14,%r12,1),%r12d xorl %edi,%ebx roll $5,%ecx addl %eax,%r12d roll $1,%edx andl %esi,%ebx addl %ecx,%r12d roll $30,%esi addl %ebx,%r12d xorl 44(%rsp),%ebp movl %edi,%eax movl %edx,40(%rsp) movl %edi,%ebx xorl 52(%rsp),%ebp andl %esi,%eax movl %r12d,%ecx xorl 12(%rsp),%ebp leal -1894007588(%rdx,%r11,1),%r11d xorl %esi,%ebx roll $5,%ecx addl %eax,%r11d roll $1,%ebp andl %r13d,%ebx addl %ecx,%r11d roll $30,%r13d addl %ebx,%r11d xorl 48(%rsp),%r14d movl %esi,%eax movl %ebp,44(%rsp) movl %esi,%ebx xorl 56(%rsp),%r14d andl %r13d,%eax movl %r11d,%ecx xorl 16(%rsp),%r14d leal -1894007588(%rbp,%rdi,1),%edi xorl %r13d,%ebx roll $5,%ecx addl %eax,%edi roll $1,%r14d andl %r12d,%ebx addl %ecx,%edi roll $30,%r12d addl %ebx,%edi xorl 52(%rsp),%edx movl %r13d,%eax movl %r14d,48(%rsp) movl %r13d,%ebx xorl 60(%rsp),%edx andl %r12d,%eax movl %edi,%ecx xorl 20(%rsp),%edx leal -1894007588(%r14,%rsi,1),%esi xorl %r12d,%ebx roll $5,%ecx addl %eax,%esi roll $1,%edx andl %r11d,%ebx addl %ecx,%esi roll $30,%r11d addl %ebx,%esi xorl 56(%rsp),%ebp movl %r12d,%eax movl %edx,52(%rsp) movl %r12d,%ebx xorl 0(%rsp),%ebp andl %r11d,%eax movl %esi,%ecx xorl 24(%rsp),%ebp leal -1894007588(%rdx,%r13,1),%r13d xorl %r11d,%ebx roll $5,%ecx addl %eax,%r13d roll $1,%ebp andl %edi,%ebx addl %ecx,%r13d roll $30,%edi addl %ebx,%r13d xorl 60(%rsp),%r14d movl %r11d,%eax movl %ebp,56(%rsp) movl %r11d,%ebx xorl 4(%rsp),%r14d andl %edi,%eax movl %r13d,%ecx xorl 28(%rsp),%r14d leal -1894007588(%rbp,%r12,1),%r12d xorl %edi,%ebx roll $5,%ecx addl %eax,%r12d roll $1,%r14d andl %esi,%ebx addl %ecx,%r12d roll $30,%esi addl %ebx,%r12d xorl 0(%rsp),%edx movl %edi,%eax movl %r14d,60(%rsp) movl %edi,%ebx xorl 8(%rsp),%edx andl %esi,%eax movl %r12d,%ecx xorl 32(%rsp),%edx leal -1894007588(%r14,%r11,1),%r11d xorl %esi,%ebx roll $5,%ecx addl %eax,%r11d roll $1,%edx andl %r13d,%ebx addl %ecx,%r11d roll $30,%r13d addl %ebx,%r11d xorl 4(%rsp),%ebp movl %esi,%eax movl %edx,0(%rsp) movl %esi,%ebx xorl 12(%rsp),%ebp andl %r13d,%eax movl %r11d,%ecx xorl 36(%rsp),%ebp leal -1894007588(%rdx,%rdi,1),%edi xorl %r13d,%ebx roll $5,%ecx addl %eax,%edi roll $1,%ebp andl %r12d,%ebx addl %ecx,%edi roll $30,%r12d addl %ebx,%edi xorl 8(%rsp),%r14d movl %r13d,%eax movl %ebp,4(%rsp) movl %r13d,%ebx xorl 16(%rsp),%r14d andl %r12d,%eax movl %edi,%ecx xorl 40(%rsp),%r14d leal -1894007588(%rbp,%rsi,1),%esi xorl %r12d,%ebx roll $5,%ecx addl %eax,%esi roll $1,%r14d andl %r11d,%ebx addl %ecx,%esi roll $30,%r11d addl %ebx,%esi xorl 12(%rsp),%edx movl %r12d,%eax movl %r14d,8(%rsp) movl %r12d,%ebx xorl 20(%rsp),%edx andl %r11d,%eax movl %esi,%ecx xorl 44(%rsp),%edx leal -1894007588(%r14,%r13,1),%r13d xorl %r11d,%ebx roll $5,%ecx addl %eax,%r13d roll $1,%edx andl %edi,%ebx addl %ecx,%r13d roll $30,%edi addl %ebx,%r13d xorl 16(%rsp),%ebp movl %r11d,%eax movl %edx,12(%rsp) movl %r11d,%ebx xorl 24(%rsp),%ebp andl %edi,%eax movl %r13d,%ecx xorl 48(%rsp),%ebp leal -1894007588(%rdx,%r12,1),%r12d xorl %edi,%ebx roll $5,%ecx addl %eax,%r12d roll $1,%ebp andl %esi,%ebx addl %ecx,%r12d roll $30,%esi addl %ebx,%r12d xorl 20(%rsp),%r14d movl %edi,%eax movl %ebp,16(%rsp) movl %edi,%ebx xorl 28(%rsp),%r14d andl %esi,%eax movl %r12d,%ecx xorl 52(%rsp),%r14d leal -1894007588(%rbp,%r11,1),%r11d xorl %esi,%ebx roll $5,%ecx addl %eax,%r11d roll $1,%r14d andl %r13d,%ebx addl %ecx,%r11d roll $30,%r13d addl %ebx,%r11d xorl 24(%rsp),%edx movl %esi,%eax movl %r14d,20(%rsp) movl %esi,%ebx xorl 32(%rsp),%edx andl %r13d,%eax movl %r11d,%ecx xorl 56(%rsp),%edx leal -1894007588(%r14,%rdi,1),%edi xorl %r13d,%ebx roll $5,%ecx addl %eax,%edi roll $1,%edx andl %r12d,%ebx addl %ecx,%edi roll $30,%r12d addl %ebx,%edi xorl 28(%rsp),%ebp movl %r13d,%eax movl %edx,24(%rsp) movl %r13d,%ebx xorl 36(%rsp),%ebp andl %r12d,%eax movl %edi,%ecx xorl 60(%rsp),%ebp leal -1894007588(%rdx,%rsi,1),%esi xorl %r12d,%ebx roll $5,%ecx addl %eax,%esi roll $1,%ebp andl %r11d,%ebx addl %ecx,%esi roll $30,%r11d addl %ebx,%esi xorl 32(%rsp),%r14d movl %r12d,%eax movl %ebp,28(%rsp) movl %r12d,%ebx xorl 40(%rsp),%r14d andl %r11d,%eax movl %esi,%ecx xorl 0(%rsp),%r14d leal -1894007588(%rbp,%r13,1),%r13d xorl %r11d,%ebx roll $5,%ecx addl %eax,%r13d roll $1,%r14d andl %edi,%ebx addl %ecx,%r13d roll $30,%edi addl %ebx,%r13d xorl 36(%rsp),%edx movl %r11d,%eax movl %r14d,32(%rsp) movl %r11d,%ebx xorl 44(%rsp),%edx andl %edi,%eax movl %r13d,%ecx xorl 4(%rsp),%edx leal -1894007588(%r14,%r12,1),%r12d xorl %edi,%ebx roll $5,%ecx addl %eax,%r12d roll $1,%edx andl %esi,%ebx addl %ecx,%r12d roll $30,%esi addl %ebx,%r12d xorl 40(%rsp),%ebp movl %edi,%eax movl %edx,36(%rsp) movl %edi,%ebx xorl 48(%rsp),%ebp andl %esi,%eax movl %r12d,%ecx xorl 8(%rsp),%ebp leal -1894007588(%rdx,%r11,1),%r11d xorl %esi,%ebx roll $5,%ecx addl %eax,%r11d roll $1,%ebp andl %r13d,%ebx addl %ecx,%r11d roll $30,%r13d addl %ebx,%r11d xorl 44(%rsp),%r14d movl %esi,%eax movl %ebp,40(%rsp) movl %esi,%ebx xorl 52(%rsp),%r14d andl %r13d,%eax movl %r11d,%ecx xorl 12(%rsp),%r14d leal -1894007588(%rbp,%rdi,1),%edi xorl %r13d,%ebx roll $5,%ecx addl %eax,%edi roll $1,%r14d andl %r12d,%ebx addl %ecx,%edi roll $30,%r12d addl %ebx,%edi xorl 48(%rsp),%edx movl %r13d,%eax movl %r14d,44(%rsp) movl %r13d,%ebx xorl 56(%rsp),%edx andl %r12d,%eax movl %edi,%ecx xorl 16(%rsp),%edx leal -1894007588(%r14,%rsi,1),%esi xorl %r12d,%ebx roll $5,%ecx addl %eax,%esi roll $1,%edx andl %r11d,%ebx addl %ecx,%esi roll $30,%r11d addl %ebx,%esi xorl 52(%rsp),%ebp movl %edi,%eax movl %edx,48(%rsp) movl %esi,%ecx xorl 60(%rsp),%ebp xorl %r12d,%eax roll $5,%ecx xorl 20(%rsp),%ebp leal -899497514(%rdx,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%ebp xorl 56(%rsp),%r14d movl %esi,%eax movl %ebp,52(%rsp) movl %r13d,%ecx xorl 0(%rsp),%r14d xorl %r11d,%eax roll $5,%ecx xorl 24(%rsp),%r14d leal -899497514(%rbp,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%r14d xorl 60(%rsp),%edx movl %r13d,%eax movl %r14d,56(%rsp) movl %r12d,%ecx xorl 4(%rsp),%edx xorl %edi,%eax roll $5,%ecx xorl 28(%rsp),%edx leal -899497514(%r14,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%edx xorl 0(%rsp),%ebp movl %r12d,%eax movl %edx,60(%rsp) movl %r11d,%ecx xorl 8(%rsp),%ebp xorl %esi,%eax roll $5,%ecx xorl 32(%rsp),%ebp leal -899497514(%rdx,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%ebp xorl 4(%rsp),%r14d movl %r11d,%eax movl %ebp,0(%rsp) movl %edi,%ecx xorl 12(%rsp),%r14d xorl %r13d,%eax roll $5,%ecx xorl 36(%rsp),%r14d leal -899497514(%rbp,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%r14d xorl 8(%rsp),%edx movl %edi,%eax movl %r14d,4(%rsp) movl %esi,%ecx xorl 16(%rsp),%edx xorl %r12d,%eax roll $5,%ecx xorl 40(%rsp),%edx leal -899497514(%r14,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%edx xorl 12(%rsp),%ebp movl %esi,%eax movl %edx,8(%rsp) movl %r13d,%ecx xorl 20(%rsp),%ebp xorl %r11d,%eax roll $5,%ecx xorl 44(%rsp),%ebp leal -899497514(%rdx,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%ebp xorl 16(%rsp),%r14d movl %r13d,%eax movl %ebp,12(%rsp) movl %r12d,%ecx xorl 24(%rsp),%r14d xorl %edi,%eax roll $5,%ecx xorl 48(%rsp),%r14d leal -899497514(%rbp,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%r14d xorl 20(%rsp),%edx movl %r12d,%eax movl %r14d,16(%rsp) movl %r11d,%ecx xorl 28(%rsp),%edx xorl %esi,%eax roll $5,%ecx xorl 52(%rsp),%edx leal -899497514(%r14,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%edx xorl 24(%rsp),%ebp movl %r11d,%eax movl %edx,20(%rsp) movl %edi,%ecx xorl 32(%rsp),%ebp xorl %r13d,%eax roll $5,%ecx xorl 56(%rsp),%ebp leal -899497514(%rdx,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%ebp xorl 28(%rsp),%r14d movl %edi,%eax movl %ebp,24(%rsp) movl %esi,%ecx xorl 36(%rsp),%r14d xorl %r12d,%eax roll $5,%ecx xorl 60(%rsp),%r14d leal -899497514(%rbp,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%r14d xorl 32(%rsp),%edx movl %esi,%eax movl %r14d,28(%rsp) movl %r13d,%ecx xorl 40(%rsp),%edx xorl %r11d,%eax roll $5,%ecx xorl 0(%rsp),%edx leal -899497514(%r14,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%edx xorl 36(%rsp),%ebp movl %r13d,%eax movl %r12d,%ecx xorl 44(%rsp),%ebp xorl %edi,%eax roll $5,%ecx xorl 4(%rsp),%ebp leal -899497514(%rdx,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%ebp xorl 40(%rsp),%r14d movl %r12d,%eax movl %r11d,%ecx xorl 48(%rsp),%r14d xorl %esi,%eax roll $5,%ecx xorl 8(%rsp),%r14d leal -899497514(%rbp,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%r14d xorl 44(%rsp),%edx movl %r11d,%eax movl %edi,%ecx xorl 52(%rsp),%edx xorl %r13d,%eax roll $5,%ecx xorl 12(%rsp),%edx leal -899497514(%r14,%rsi,1),%esi xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi roll $1,%edx xorl 48(%rsp),%ebp movl %edi,%eax movl %esi,%ecx xorl 56(%rsp),%ebp xorl %r12d,%eax roll $5,%ecx xorl 16(%rsp),%ebp leal -899497514(%rdx,%r13,1),%r13d xorl %r11d,%eax addl %ecx,%r13d roll $30,%edi addl %eax,%r13d roll $1,%ebp xorl 52(%rsp),%r14d movl %esi,%eax movl %r13d,%ecx xorl 60(%rsp),%r14d xorl %r11d,%eax roll $5,%ecx xorl 20(%rsp),%r14d leal -899497514(%rbp,%r12,1),%r12d xorl %edi,%eax addl %ecx,%r12d roll $30,%esi addl %eax,%r12d roll $1,%r14d xorl 56(%rsp),%edx movl %r13d,%eax movl %r12d,%ecx xorl 0(%rsp),%edx xorl %edi,%eax roll $5,%ecx xorl 24(%rsp),%edx leal -899497514(%r14,%r11,1),%r11d xorl %esi,%eax addl %ecx,%r11d roll $30,%r13d addl %eax,%r11d roll $1,%edx xorl 60(%rsp),%ebp movl %r12d,%eax movl %r11d,%ecx xorl 4(%rsp),%ebp xorl %esi,%eax roll $5,%ecx xorl 28(%rsp),%ebp leal -899497514(%rdx,%rdi,1),%edi xorl %r13d,%eax addl %ecx,%edi roll $30,%r12d addl %eax,%edi roll $1,%ebp movl %r11d,%eax movl %edi,%ecx xorl %r13d,%eax leal -899497514(%rbp,%rsi,1),%esi roll $5,%ecx xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi addl 0(%r8),%esi addl 4(%r8),%edi addl 8(%r8),%r11d addl 12(%r8),%r12d addl 16(%r8),%r13d movl %esi,0(%r8) movl %edi,4(%r8) movl %r11d,8(%r8) movl %r12d,12(%r8) movl %r13d,16(%r8) subq $1,%r10 leaq 64(%r9),%r9 jnz .Lloop movq 64(%rsp),%rsi movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha1_block_data_order,.-sha1_block_data_order .type sha1_block_data_order_shaext,@function .align 32 sha1_block_data_order_shaext: _shaext_shortcut: movdqu (%rdi),%xmm0 movd 16(%rdi),%xmm1 movdqa K_XX_XX+160(%rip),%xmm3 movdqu (%rsi),%xmm4 pshufd $27,%xmm0,%xmm0 movdqu 16(%rsi),%xmm5 pshufd $27,%xmm1,%xmm1 movdqu 32(%rsi),%xmm6 .byte 102,15,56,0,227 movdqu 48(%rsi),%xmm7 .byte 102,15,56,0,235 .byte 102,15,56,0,243 movdqa %xmm1,%xmm9 .byte 102,15,56,0,251 jmp .Loop_shaext .align 16 .Loop_shaext: decq %rdx leaq 64(%rsi),%rax paddd %xmm4,%xmm1 cmovneq %rax,%rsi movdqa %xmm0,%xmm8 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 movdqu (%rsi),%xmm4 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,213 movdqu 16(%rsi),%xmm5 .byte 102,15,56,0,227 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,206 movdqu 32(%rsi),%xmm6 .byte 102,15,56,0,235 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,215 movdqu 48(%rsi),%xmm7 .byte 102,15,56,0,243 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 65,15,56,200,201 .byte 102,15,56,0,251 paddd %xmm8,%xmm0 movdqa %xmm1,%xmm9 jnz .Loop_shaext pshufd $27,%xmm0,%xmm0 pshufd $27,%xmm1,%xmm1 movdqu %xmm0,(%rdi) movd %xmm1,16(%rdi) .byte 0xf3,0xc3 .size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext .type sha1_block_data_order_ssse3,@function .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 leaq -64(%rsp),%rsp movq %rax,%r14 andq $-64,%rsp movq %rdi,%r8 movq %rsi,%r9 movq %rdx,%r10 shlq $6,%r10 addq %r9,%r10 leaq K_XX_XX+64(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx movl 8(%r8),%ecx movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp movl %ecx,%edi xorl %edx,%edi andl %edi,%esi movdqa 64(%r11),%xmm6 movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 movdqu 48(%r9),%xmm3 .byte 102,15,56,0,198 .byte 102,15,56,0,206 .byte 102,15,56,0,214 addq $64,%r9 paddd %xmm9,%xmm0 .byte 102,15,56,0,222 paddd %xmm9,%xmm1 paddd %xmm9,%xmm2 movdqa %xmm0,0(%rsp) psubd %xmm9,%xmm0 movdqa %xmm1,16(%rsp) psubd %xmm9,%xmm1 movdqa %xmm2,32(%rsp) psubd %xmm9,%xmm2 jmp .Loop_ssse3 .align 16 .Loop_ssse3: rorl $2,%ebx pshufd $238,%xmm0,%xmm4 xorl %edx,%esi movdqa %xmm3,%xmm8 paddd %xmm3,%xmm9 movl %eax,%edi addl 0(%rsp),%ebp punpcklqdq %xmm1,%xmm4 xorl %ecx,%ebx roll $5,%eax addl %esi,%ebp psrldq $4,%xmm8 andl %ebx,%edi xorl %ecx,%ebx pxor %xmm0,%xmm4 addl %eax,%ebp rorl $7,%eax pxor %xmm2,%xmm8 xorl %ecx,%edi movl %ebp,%esi addl 4(%rsp),%edx pxor %xmm8,%xmm4 xorl %ebx,%eax roll $5,%ebp movdqa %xmm9,48(%rsp) addl %edi,%edx andl %eax,%esi movdqa %xmm4,%xmm10 xorl %ebx,%eax addl %ebp,%edx rorl $7,%ebp movdqa %xmm4,%xmm8 xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi addl 8(%rsp),%ecx psrld $31,%xmm8 xorl %eax,%ebp roll $5,%edx addl %esi,%ecx movdqa %xmm10,%xmm9 andl %ebp,%edi xorl %eax,%ebp psrld $30,%xmm10 addl %edx,%ecx rorl $7,%edx por %xmm8,%xmm4 xorl %eax,%edi movl %ecx,%esi addl 12(%rsp),%ebx pslld $2,%xmm9 pxor %xmm10,%xmm4 xorl %ebp,%edx movdqa -64(%r11),%xmm10 roll $5,%ecx addl %edi,%ebx andl %edx,%esi pxor %xmm9,%xmm4 xorl %ebp,%edx addl %ecx,%ebx rorl $7,%ecx pshufd $238,%xmm1,%xmm5 xorl %ebp,%esi movdqa %xmm4,%xmm9 paddd %xmm4,%xmm10 movl %ebx,%edi addl 16(%rsp),%eax punpcklqdq %xmm2,%xmm5 xorl %edx,%ecx roll $5,%ebx addl %esi,%eax psrldq $4,%xmm9 andl %ecx,%edi xorl %edx,%ecx pxor %xmm1,%xmm5 addl %ebx,%eax rorl $7,%ebx pxor %xmm3,%xmm9 xorl %edx,%edi movl %eax,%esi addl 20(%rsp),%ebp pxor %xmm9,%xmm5 xorl %ecx,%ebx roll $5,%eax movdqa %xmm10,0(%rsp) addl %edi,%ebp andl %ebx,%esi movdqa %xmm5,%xmm8 xorl %ecx,%ebx addl %eax,%ebp rorl $7,%eax movdqa %xmm5,%xmm9 xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi addl 24(%rsp),%edx psrld $31,%xmm9 xorl %ebx,%eax roll $5,%ebp addl %esi,%edx movdqa %xmm8,%xmm10 andl %eax,%edi xorl %ebx,%eax psrld $30,%xmm8 addl %ebp,%edx rorl $7,%ebp por %xmm9,%xmm5 xorl %ebx,%edi movl %edx,%esi addl 28(%rsp),%ecx pslld $2,%xmm10 pxor %xmm8,%xmm5 xorl %eax,%ebp movdqa -32(%r11),%xmm8 roll $5,%edx addl %edi,%ecx andl %ebp,%esi pxor %xmm10,%xmm5 xorl %eax,%ebp addl %edx,%ecx rorl $7,%edx pshufd $238,%xmm2,%xmm6 xorl %eax,%esi movdqa %xmm5,%xmm10 paddd %xmm5,%xmm8 movl %ecx,%edi addl 32(%rsp),%ebx punpcklqdq %xmm3,%xmm6 xorl %ebp,%edx roll $5,%ecx addl %esi,%ebx psrldq $4,%xmm10 andl %edx,%edi xorl %ebp,%edx pxor %xmm2,%xmm6 addl %ecx,%ebx rorl $7,%ecx pxor %xmm4,%xmm10 xorl %ebp,%edi movl %ebx,%esi addl 36(%rsp),%eax pxor %xmm10,%xmm6 xorl %edx,%ecx roll $5,%ebx movdqa %xmm8,16(%rsp) addl %edi,%eax andl %ecx,%esi movdqa %xmm6,%xmm9 xorl %edx,%ecx addl %ebx,%eax rorl $7,%ebx movdqa %xmm6,%xmm10 xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi addl 40(%rsp),%ebp psrld $31,%xmm10 xorl %ecx,%ebx roll $5,%eax addl %esi,%ebp movdqa %xmm9,%xmm8 andl %ebx,%edi xorl %ecx,%ebx psrld $30,%xmm9 addl %eax,%ebp rorl $7,%eax por %xmm10,%xmm6 xorl %ecx,%edi movl %ebp,%esi addl 44(%rsp),%edx pslld $2,%xmm8 pxor %xmm9,%xmm6 xorl %ebx,%eax movdqa -32(%r11),%xmm9 roll $5,%ebp addl %edi,%edx andl %eax,%esi pxor %xmm8,%xmm6 xorl %ebx,%eax addl %ebp,%edx rorl $7,%ebp pshufd $238,%xmm3,%xmm7 xorl %ebx,%esi movdqa %xmm6,%xmm8 paddd %xmm6,%xmm9 movl %edx,%edi addl 48(%rsp),%ecx punpcklqdq %xmm4,%xmm7 xorl %eax,%ebp roll $5,%edx addl %esi,%ecx psrldq $4,%xmm8 andl %ebp,%edi xorl %eax,%ebp pxor %xmm3,%xmm7 addl %edx,%ecx rorl $7,%edx pxor %xmm5,%xmm8 xorl %eax,%edi movl %ecx,%esi addl 52(%rsp),%ebx pxor %xmm8,%xmm7 xorl %ebp,%edx roll $5,%ecx movdqa %xmm9,32(%rsp) addl %edi,%ebx andl %edx,%esi movdqa %xmm7,%xmm10 xorl %ebp,%edx addl %ecx,%ebx rorl $7,%ecx movdqa %xmm7,%xmm8 xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi addl 56(%rsp),%eax psrld $31,%xmm8 xorl %edx,%ecx roll $5,%ebx addl %esi,%eax movdqa %xmm10,%xmm9 andl %ecx,%edi xorl %edx,%ecx psrld $30,%xmm10 addl %ebx,%eax rorl $7,%ebx por %xmm8,%xmm7 xorl %edx,%edi movl %eax,%esi addl 60(%rsp),%ebp pslld $2,%xmm9 pxor %xmm10,%xmm7 xorl %ecx,%ebx movdqa -32(%r11),%xmm10 roll $5,%eax addl %edi,%ebp andl %ebx,%esi pxor %xmm9,%xmm7 pshufd $238,%xmm6,%xmm9 xorl %ecx,%ebx addl %eax,%ebp rorl $7,%eax pxor %xmm4,%xmm0 xorl %ecx,%esi movl %ebp,%edi addl 0(%rsp),%edx punpcklqdq %xmm7,%xmm9 xorl %ebx,%eax roll $5,%ebp pxor %xmm1,%xmm0 addl %esi,%edx andl %eax,%edi movdqa %xmm10,%xmm8 xorl %ebx,%eax paddd %xmm7,%xmm10 addl %ebp,%edx pxor %xmm9,%xmm0 rorl $7,%ebp xorl %ebx,%edi movl %edx,%esi addl 4(%rsp),%ecx movdqa %xmm0,%xmm9 xorl %eax,%ebp roll $5,%edx movdqa %xmm10,48(%rsp) addl %edi,%ecx andl %ebp,%esi xorl %eax,%ebp pslld $2,%xmm0 addl %edx,%ecx rorl $7,%edx psrld $30,%xmm9 xorl %eax,%esi movl %ecx,%edi addl 8(%rsp),%ebx por %xmm9,%xmm0 xorl %ebp,%edx roll $5,%ecx pshufd $238,%xmm7,%xmm10 addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx addl %ecx,%ebx addl 12(%rsp),%eax xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax pxor %xmm5,%xmm1 addl 16(%rsp),%ebp xorl %ecx,%esi punpcklqdq %xmm0,%xmm10 movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 addl %esi,%ebp xorl %ecx,%edi movdqa %xmm8,%xmm9 rorl $7,%ebx paddd %xmm0,%xmm8 addl %eax,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 addl %edi,%edx xorl %ebx,%esi movdqa %xmm8,0(%rsp) rorl $7,%eax addl %ebp,%edx addl 24(%rsp),%ecx pslld $2,%xmm1 xorl %eax,%esi movl %edx,%edi psrld $30,%xmm10 roll $5,%edx addl %esi,%ecx xorl %eax,%edi rorl $7,%ebp por %xmm10,%xmm1 addl %edx,%ecx addl 28(%rsp),%ebx pshufd $238,%xmm0,%xmm8 xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx pxor %xmm6,%xmm2 addl 32(%rsp),%eax xorl %edx,%esi punpcklqdq %xmm1,%xmm8 movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 addl %esi,%eax xorl %edx,%edi movdqa 0(%r11),%xmm10 rorl $7,%ecx paddd %xmm1,%xmm9 addl %ebx,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp xorl %ecx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 addl %edi,%ebp xorl %ecx,%esi movdqa %xmm9,16(%rsp) rorl $7,%ebx addl %eax,%ebp addl 40(%rsp),%edx pslld $2,%xmm2 xorl %ebx,%esi movl %ebp,%edi psrld $30,%xmm8 roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax por %xmm8,%xmm2 addl %ebp,%edx addl 44(%rsp),%ecx pshufd $238,%xmm1,%xmm9 xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx pxor %xmm7,%xmm3 addl 48(%rsp),%ebx xorl %ebp,%esi punpcklqdq %xmm2,%xmm9 movl %ecx,%edi roll $5,%ecx pxor %xmm4,%xmm3 addl %esi,%ebx xorl %ebp,%edi movdqa %xmm10,%xmm8 rorl $7,%edx paddd %xmm2,%xmm10 addl %ecx,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 addl %edi,%eax xorl %edx,%esi movdqa %xmm10,32(%rsp) rorl $7,%ecx addl %ebx,%eax addl 56(%rsp),%ebp pslld $2,%xmm3 xorl %ecx,%esi movl %eax,%edi psrld $30,%xmm9 roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx por %xmm9,%xmm3 addl %eax,%ebp addl 60(%rsp),%edx pshufd $238,%xmm2,%xmm10 xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx pxor %xmm0,%xmm4 addl 0(%rsp),%ecx xorl %eax,%esi punpcklqdq %xmm3,%xmm10 movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 addl %esi,%ecx xorl %eax,%edi movdqa %xmm8,%xmm9 rorl $7,%ebp paddd %xmm3,%xmm8 addl %edx,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 addl %edi,%ebx xorl %ebp,%esi movdqa %xmm8,48(%rsp) rorl $7,%edx addl %ecx,%ebx addl 8(%rsp),%eax pslld $2,%xmm4 xorl %edx,%esi movl %ebx,%edi psrld $30,%xmm10 roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx por %xmm10,%xmm4 addl %ebx,%eax addl 12(%rsp),%ebp pshufd $238,%xmm3,%xmm8 xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp pxor %xmm1,%xmm5 addl 16(%rsp),%edx xorl %ebx,%esi punpcklqdq %xmm4,%xmm8 movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 addl %esi,%edx xorl %ebx,%edi movdqa %xmm9,%xmm10 rorl $7,%eax paddd %xmm4,%xmm9 addl %ebp,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 addl %edi,%ecx xorl %eax,%esi movdqa %xmm9,0(%rsp) rorl $7,%ebp addl %edx,%ecx addl 24(%rsp),%ebx pslld $2,%xmm5 xorl %ebp,%esi movl %ecx,%edi psrld $30,%xmm8 roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx por %xmm8,%xmm5 addl %ecx,%ebx addl 28(%rsp),%eax pshufd $238,%xmm4,%xmm9 rorl $7,%ecx movl %ebx,%esi xorl %edx,%edi roll $5,%ebx addl %edi,%eax xorl %ecx,%esi xorl %edx,%ecx addl %ebx,%eax pxor %xmm2,%xmm6 addl 32(%rsp),%ebp andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx punpcklqdq %xmm5,%xmm9 movl %eax,%edi xorl %ecx,%esi pxor %xmm7,%xmm6 roll $5,%eax addl %esi,%ebp movdqa %xmm10,%xmm8 xorl %ebx,%edi paddd %xmm5,%xmm10 xorl %ecx,%ebx pxor %xmm9,%xmm6 addl %eax,%ebp addl 36(%rsp),%edx andl %ebx,%edi xorl %ecx,%ebx rorl $7,%eax movdqa %xmm6,%xmm9 movl %ebp,%esi xorl %ebx,%edi movdqa %xmm10,16(%rsp) roll $5,%ebp addl %edi,%edx xorl %eax,%esi pslld $2,%xmm6 xorl %ebx,%eax addl %ebp,%edx psrld $30,%xmm9 addl 40(%rsp),%ecx andl %eax,%esi xorl %ebx,%eax por %xmm9,%xmm6 rorl $7,%ebp movl %edx,%edi xorl %eax,%esi roll $5,%edx pshufd $238,%xmm5,%xmm10 addl %esi,%ecx xorl %ebp,%edi xorl %eax,%ebp addl %edx,%ecx addl 44(%rsp),%ebx andl %ebp,%edi xorl %eax,%ebp rorl $7,%edx movl %ecx,%esi xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx xorl %edx,%esi xorl %ebp,%edx addl %ecx,%ebx pxor %xmm3,%xmm7 addl 48(%rsp),%eax andl %edx,%esi xorl %ebp,%edx rorl $7,%ecx punpcklqdq %xmm6,%xmm10 movl %ebx,%edi xorl %edx,%esi pxor %xmm0,%xmm7 roll $5,%ebx addl %esi,%eax movdqa 32(%r11),%xmm9 xorl %ecx,%edi paddd %xmm6,%xmm8 xorl %edx,%ecx pxor %xmm10,%xmm7 addl %ebx,%eax addl 52(%rsp),%ebp andl %ecx,%edi xorl %edx,%ecx rorl $7,%ebx movdqa %xmm7,%xmm10 movl %eax,%esi xorl %ecx,%edi movdqa %xmm8,32(%rsp) roll $5,%eax addl %edi,%ebp xorl %ebx,%esi pslld $2,%xmm7 xorl %ecx,%ebx addl %eax,%ebp psrld $30,%xmm10 addl 56(%rsp),%edx andl %ebx,%esi xorl %ecx,%ebx por %xmm10,%xmm7 rorl $7,%eax movl %ebp,%edi xorl %ebx,%esi roll $5,%ebp pshufd $238,%xmm6,%xmm8 addl %esi,%edx xorl %eax,%edi xorl %ebx,%eax addl %ebp,%edx addl 60(%rsp),%ecx andl %eax,%edi xorl %ebx,%eax rorl $7,%ebp movl %edx,%esi xorl %eax,%edi roll $5,%edx addl %edi,%ecx xorl %ebp,%esi xorl %eax,%ebp addl %edx,%ecx pxor %xmm4,%xmm0 addl 0(%rsp),%ebx andl %ebp,%esi xorl %eax,%ebp rorl $7,%edx punpcklqdq %xmm7,%xmm8 movl %ecx,%edi xorl %ebp,%esi pxor %xmm1,%xmm0 roll $5,%ecx addl %esi,%ebx movdqa %xmm9,%xmm10 xorl %edx,%edi paddd %xmm7,%xmm9 xorl %ebp,%edx pxor %xmm8,%xmm0 addl %ecx,%ebx addl 4(%rsp),%eax andl %edx,%edi xorl %ebp,%edx rorl $7,%ecx movdqa %xmm0,%xmm8 movl %ebx,%esi xorl %edx,%edi movdqa %xmm9,48(%rsp) roll $5,%ebx addl %edi,%eax xorl %ecx,%esi pslld $2,%xmm0 xorl %edx,%ecx addl %ebx,%eax psrld $30,%xmm8 addl 8(%rsp),%ebp andl %ecx,%esi xorl %edx,%ecx por %xmm8,%xmm0 rorl $7,%ebx movl %eax,%edi xorl %ecx,%esi roll $5,%eax pshufd $238,%xmm7,%xmm9 addl %esi,%ebp xorl %ebx,%edi xorl %ecx,%ebx addl %eax,%ebp addl 12(%rsp),%edx andl %ebx,%edi xorl %ecx,%ebx rorl $7,%eax movl %ebp,%esi xorl %ebx,%edi roll $5,%ebp addl %edi,%edx xorl %eax,%esi xorl %ebx,%eax addl %ebp,%edx pxor %xmm5,%xmm1 addl 16(%rsp),%ecx andl %eax,%esi xorl %ebx,%eax rorl $7,%ebp punpcklqdq %xmm0,%xmm9 movl %edx,%edi xorl %eax,%esi pxor %xmm2,%xmm1 roll $5,%edx addl %esi,%ecx movdqa %xmm10,%xmm8 xorl %ebp,%edi paddd %xmm0,%xmm10 xorl %eax,%ebp pxor %xmm9,%xmm1 addl %edx,%ecx addl 20(%rsp),%ebx andl %ebp,%edi xorl %eax,%ebp rorl $7,%edx movdqa %xmm1,%xmm9 movl %ecx,%esi xorl %ebp,%edi movdqa %xmm10,0(%rsp) roll $5,%ecx addl %edi,%ebx xorl %edx,%esi pslld $2,%xmm1 xorl %ebp,%edx addl %ecx,%ebx psrld $30,%xmm9 addl 24(%rsp),%eax andl %edx,%esi xorl %ebp,%edx por %xmm9,%xmm1 rorl $7,%ecx movl %ebx,%edi xorl %edx,%esi roll $5,%ebx pshufd $238,%xmm0,%xmm10 addl %esi,%eax xorl %ecx,%edi xorl %edx,%ecx addl %ebx,%eax addl 28(%rsp),%ebp andl %ecx,%edi xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%edi roll $5,%eax addl %edi,%ebp xorl %ebx,%esi xorl %ecx,%ebx addl %eax,%ebp pxor %xmm6,%xmm2 addl 32(%rsp),%edx andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax punpcklqdq %xmm1,%xmm10 movl %ebp,%edi xorl %ebx,%esi pxor %xmm3,%xmm2 roll $5,%ebp addl %esi,%edx movdqa %xmm8,%xmm9 xorl %eax,%edi paddd %xmm1,%xmm8 xorl %ebx,%eax pxor %xmm10,%xmm2 addl %ebp,%edx addl 36(%rsp),%ecx andl %eax,%edi xorl %ebx,%eax rorl $7,%ebp movdqa %xmm2,%xmm10 movl %edx,%esi xorl %eax,%edi movdqa %xmm8,16(%rsp) roll $5,%edx addl %edi,%ecx xorl %ebp,%esi pslld $2,%xmm2 xorl %eax,%ebp addl %edx,%ecx psrld $30,%xmm10 addl 40(%rsp),%ebx andl %ebp,%esi xorl %eax,%ebp por %xmm10,%xmm2 rorl $7,%edx movl %ecx,%edi xorl %ebp,%esi roll $5,%ecx pshufd $238,%xmm1,%xmm8 addl %esi,%ebx xorl %edx,%edi xorl %ebp,%edx addl %ecx,%ebx addl 44(%rsp),%eax andl %edx,%edi xorl %ebp,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%edi roll $5,%ebx addl %edi,%eax xorl %edx,%esi addl %ebx,%eax pxor %xmm7,%xmm3 addl 48(%rsp),%ebp xorl %ecx,%esi punpcklqdq %xmm2,%xmm8 movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 addl %esi,%ebp xorl %ecx,%edi movdqa %xmm9,%xmm10 rorl $7,%ebx paddd %xmm2,%xmm9 addl %eax,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 addl %edi,%edx xorl %ebx,%esi movdqa %xmm9,32(%rsp) rorl $7,%eax addl %ebp,%edx addl 56(%rsp),%ecx pslld $2,%xmm3 xorl %eax,%esi movl %edx,%edi psrld $30,%xmm8 roll $5,%edx addl %esi,%ecx xorl %eax,%edi rorl $7,%ebp por %xmm8,%xmm3 addl %edx,%ecx addl 60(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx addl 0(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx paddd %xmm3,%xmm10 addl %esi,%eax xorl %edx,%edi movdqa %xmm10,48(%rsp) rorl $7,%ecx addl %ebx,%eax addl 4(%rsp),%ebp xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 8(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax addl %ebp,%edx addl 12(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx cmpq %r10,%r9 je .Ldone_ssse3 movdqa 64(%r11),%xmm6 movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 movdqu 48(%r9),%xmm3 .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi .byte 102,15,56,0,206 roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx paddd %xmm9,%xmm0 addl %ecx,%ebx addl 20(%rsp),%eax xorl %edx,%edi movl %ebx,%esi movdqa %xmm0,0(%rsp) roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx psubd %xmm9,%xmm0 addl %ebx,%eax addl 24(%rsp),%ebp xorl %ecx,%esi movl %eax,%edi roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx addl %eax,%ebp addl 28(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx addl 32(%rsp),%ecx xorl %eax,%esi movl %edx,%edi .byte 102,15,56,0,214 roll $5,%edx addl %esi,%ecx xorl %eax,%edi rorl $7,%ebp paddd %xmm9,%xmm1 addl %edx,%ecx addl 36(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi movdqa %xmm1,16(%rsp) roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx psubd %xmm9,%xmm1 addl %ecx,%ebx addl 40(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx addl %ebx,%eax addl 44(%rsp),%ebp xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 48(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi .byte 102,15,56,0,222 roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax paddd %xmm9,%xmm2 addl %ebp,%edx addl 52(%rsp),%ecx xorl %eax,%edi movl %edx,%esi movdqa %xmm2,32(%rsp) roll $5,%edx addl %edi,%ecx xorl %eax,%esi rorl $7,%ebp psubd %xmm9,%xmm2 addl %edx,%ecx addl 56(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 60(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax rorl $7,%ecx addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx addl 12(%r8),%edx movl %eax,0(%r8) addl 16(%r8),%ebp movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) movl %ecx,%edi movl %edx,12(%r8) xorl %edx,%edi movl %ebp,16(%r8) andl %edi,%esi jmp .Loop_ssse3 .align 16 .Ldone_ssse3: addl 16(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 20(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 24(%rsp),%ebp xorl %ecx,%esi movl %eax,%edi roll $5,%eax addl %esi,%ebp xorl %ecx,%edi rorl $7,%ebx addl %eax,%ebp addl 28(%rsp),%edx xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp addl %edi,%edx xorl %ebx,%esi rorl $7,%eax addl %ebp,%edx addl 32(%rsp),%ecx xorl %eax,%esi movl %edx,%edi roll $5,%edx addl %esi,%ecx xorl %eax,%edi rorl $7,%ebp addl %edx,%ecx addl 36(%rsp),%ebx xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx addl %edi,%ebx xorl %ebp,%esi rorl $7,%edx addl %ecx,%ebx addl 40(%rsp),%eax xorl %edx,%esi movl %ebx,%edi roll $5,%ebx addl %esi,%eax xorl %edx,%edi rorl $7,%ecx addl %ebx,%eax addl 44(%rsp),%ebp xorl %ecx,%edi movl %eax,%esi roll $5,%eax addl %edi,%ebp xorl %ecx,%esi rorl $7,%ebx addl %eax,%ebp addl 48(%rsp),%edx xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp addl %esi,%edx xorl %ebx,%edi rorl $7,%eax addl %ebp,%edx addl 52(%rsp),%ecx xorl %eax,%edi movl %edx,%esi roll $5,%edx addl %edi,%ecx xorl %eax,%esi rorl $7,%ebp addl %edx,%ecx addl 56(%rsp),%ebx xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx addl %esi,%ebx xorl %ebp,%edi rorl $7,%edx addl %ecx,%ebx addl 60(%rsp),%eax xorl %edx,%edi movl %ebx,%esi roll $5,%ebx addl %edi,%eax rorl $7,%ecx addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx movl %eax,0(%r8) addl 12(%r8),%edx movl %esi,4(%r8) addl 16(%r8),%ebp movl %ecx,8(%r8) movl %edx,12(%r8) movl %ebp,16(%r8) leaq (%r14),%rsi movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +_avx_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + vzeroupper + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.align 16 +.Loop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r11),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r11),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r11),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_avx + +.align 16 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx +.type sha1_block_data_order_avx2,@function +.align 16 +sha1_block_data_order_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + vzeroupper + movq %rax,%r14 + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r11),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp .Loop_avx2 +.align 32 +.Loop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp .Lalign32_1 +.align 32 +.Lalign32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r11),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r11),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp .Lalign32_2 +.align 32 +.Lalign32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je .Ldone_avx2 + vmovdqu 64(%r11),%ymm6 + cmpq %r10,%rdi + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp .Last_avx2 + +.align 32 +.Last_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp .Lalign32_3 +.align 32 +.Lalign32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 .align 64 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 Index: head/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S (revision 299481) @@ -1,3259 +1,7904 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha256-mb-x86_64.pl. .text .globl sha256_multi_block .type sha256_multi_block,@function .align 32 sha256_multi_block: movq OPENSSL_ia32cap_P+4(%rip),%rcx btq $61,%rcx jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut movq %rsp,%rax pushq %rbx pushq %rbp subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) .Lbody: leaq K256+128(%rip),%rbp leaq 256(%rsp),%rbx leaq 128(%rdi),%rdi .Loop_grande: movl %edx,280(%rsp) xorl %edx,%edx movq 0(%rsi),%r8 movl 8(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,0(%rbx) cmovleq %rbp,%r8 movq 16(%rsi),%r9 movl 24(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,4(%rbx) cmovleq %rbp,%r9 movq 32(%rsi),%r10 movl 40(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,8(%rbx) cmovleq %rbp,%r10 movq 48(%rsi),%r11 movl 56(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,12(%rbx) cmovleq %rbp,%r11 testl %edx,%edx jz .Ldone movdqu 0-128(%rdi),%xmm8 leaq 128(%rsp),%rax movdqu 32-128(%rdi),%xmm9 movdqu 64-128(%rdi),%xmm10 movdqu 96-128(%rdi),%xmm11 movdqu 128-128(%rdi),%xmm12 movdqu 160-128(%rdi),%xmm13 movdqu 192-128(%rdi),%xmm14 movdqu 224-128(%rdi),%xmm15 movdqu .Lpbswap(%rip),%xmm6 jmp .Loop .align 32 .Loop: movdqa %xmm10,%xmm4 pxor %xmm9,%xmm4 movd 0(%r8),%xmm5 movd 0(%r9),%xmm0 movd 0(%r10),%xmm1 movd 0(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm12,%xmm7 .byte 102,15,56,0,238 movdqa %xmm12,%xmm2 psrld $6,%xmm7 movdqa %xmm12,%xmm1 pslld $7,%xmm2 movdqa %xmm5,0-128(%rax) paddd %xmm15,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -128(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm12,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm3 pslld $26-21,%xmm2 pandn %xmm14,%xmm0 pand %xmm13,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm8,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm9,%xmm3 movdqa %xmm8,%xmm7 pslld $10,%xmm2 pxor %xmm8,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm9,%xmm15 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm15 paddd %xmm5,%xmm11 pxor %xmm2,%xmm7 paddd %xmm5,%xmm15 paddd %xmm7,%xmm15 movd 4(%r8),%xmm5 movd 4(%r9),%xmm0 movd 4(%r10),%xmm1 movd 4(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm11,%xmm1 pslld $7,%xmm2 movdqa %xmm5,16-128(%rax) paddd %xmm14,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -96(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm11,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm4 pslld $26-21,%xmm2 pandn %xmm13,%xmm0 pand %xmm12,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm15,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm15,%xmm7 pslld $10,%xmm2 pxor %xmm15,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm8,%xmm14 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm14 paddd %xmm5,%xmm10 pxor %xmm2,%xmm7 paddd %xmm5,%xmm14 paddd %xmm7,%xmm14 movd 8(%r8),%xmm5 movd 8(%r9),%xmm0 movd 8(%r10),%xmm1 movd 8(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm10,%xmm7 .byte 102,15,56,0,238 movdqa %xmm10,%xmm2 psrld $6,%xmm7 movdqa %xmm10,%xmm1 pslld $7,%xmm2 movdqa %xmm5,32-128(%rax) paddd %xmm13,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm10,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm3 pslld $26-21,%xmm2 pandn %xmm12,%xmm0 pand %xmm11,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm14,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm15,%xmm3 movdqa %xmm14,%xmm7 pslld $10,%xmm2 pxor %xmm14,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm15,%xmm13 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm13 paddd %xmm5,%xmm9 pxor %xmm2,%xmm7 paddd %xmm5,%xmm13 paddd %xmm7,%xmm13 movd 12(%r8),%xmm5 movd 12(%r9),%xmm0 movd 12(%r10),%xmm1 movd 12(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm9,%xmm7 movdqa %xmm9,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm9,%xmm1 pslld $7,%xmm2 movdqa %xmm5,48-128(%rax) paddd %xmm12,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -32(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm9,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm4 pslld $26-21,%xmm2 pandn %xmm11,%xmm0 pand %xmm10,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm13,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm14,%xmm4 movdqa %xmm13,%xmm7 pslld $10,%xmm2 pxor %xmm13,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm14,%xmm12 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm12 paddd %xmm5,%xmm8 pxor %xmm2,%xmm7 paddd %xmm5,%xmm12 paddd %xmm7,%xmm12 movd 16(%r8),%xmm5 movd 16(%r9),%xmm0 movd 16(%r10),%xmm1 movd 16(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm8,%xmm7 .byte 102,15,56,0,238 movdqa %xmm8,%xmm2 psrld $6,%xmm7 movdqa %xmm8,%xmm1 pslld $7,%xmm2 movdqa %xmm5,64-128(%rax) paddd %xmm11,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 0(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm8,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm3 pslld $26-21,%xmm2 pandn %xmm10,%xmm0 pand %xmm9,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm12,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm13,%xmm3 movdqa %xmm12,%xmm7 pslld $10,%xmm2 pxor %xmm12,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm13,%xmm11 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm11 paddd %xmm5,%xmm15 pxor %xmm2,%xmm7 paddd %xmm5,%xmm11 paddd %xmm7,%xmm11 movd 20(%r8),%xmm5 movd 20(%r9),%xmm0 movd 20(%r10),%xmm1 movd 20(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm15,%xmm7 movdqa %xmm15,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm15,%xmm1 pslld $7,%xmm2 movdqa %xmm5,80-128(%rax) paddd %xmm10,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 32(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm15,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm4 pslld $26-21,%xmm2 pandn %xmm9,%xmm0 pand %xmm8,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm11,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm12,%xmm4 movdqa %xmm11,%xmm7 pslld $10,%xmm2 pxor %xmm11,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm12,%xmm10 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm10 paddd %xmm5,%xmm14 pxor %xmm2,%xmm7 paddd %xmm5,%xmm10 paddd %xmm7,%xmm10 movd 24(%r8),%xmm5 movd 24(%r9),%xmm0 movd 24(%r10),%xmm1 movd 24(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm14,%xmm7 .byte 102,15,56,0,238 movdqa %xmm14,%xmm2 psrld $6,%xmm7 movdqa %xmm14,%xmm1 pslld $7,%xmm2 movdqa %xmm5,96-128(%rax) paddd %xmm9,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm14,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm3 pslld $26-21,%xmm2 pandn %xmm8,%xmm0 pand %xmm15,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm10,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm11,%xmm3 movdqa %xmm10,%xmm7 pslld $10,%xmm2 pxor %xmm10,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm11,%xmm9 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm9 paddd %xmm5,%xmm13 pxor %xmm2,%xmm7 paddd %xmm5,%xmm9 paddd %xmm7,%xmm9 movd 28(%r8),%xmm5 movd 28(%r9),%xmm0 movd 28(%r10),%xmm1 movd 28(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm13,%xmm1 pslld $7,%xmm2 movdqa %xmm5,112-128(%rax) paddd %xmm8,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 96(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm13,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm4 pslld $26-21,%xmm2 pandn %xmm15,%xmm0 pand %xmm14,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm9,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm10,%xmm4 movdqa %xmm9,%xmm7 pslld $10,%xmm2 pxor %xmm9,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm10,%xmm8 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm8 paddd %xmm5,%xmm12 pxor %xmm2,%xmm7 paddd %xmm5,%xmm8 paddd %xmm7,%xmm8 leaq 256(%rbp),%rbp movd 32(%r8),%xmm5 movd 32(%r9),%xmm0 movd 32(%r10),%xmm1 movd 32(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm12,%xmm7 .byte 102,15,56,0,238 movdqa %xmm12,%xmm2 psrld $6,%xmm7 movdqa %xmm12,%xmm1 pslld $7,%xmm2 movdqa %xmm5,128-128(%rax) paddd %xmm15,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -128(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm12,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm3 pslld $26-21,%xmm2 pandn %xmm14,%xmm0 pand %xmm13,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm8,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm9,%xmm3 movdqa %xmm8,%xmm7 pslld $10,%xmm2 pxor %xmm8,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm9,%xmm15 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm15 paddd %xmm5,%xmm11 pxor %xmm2,%xmm7 paddd %xmm5,%xmm15 paddd %xmm7,%xmm15 movd 36(%r8),%xmm5 movd 36(%r9),%xmm0 movd 36(%r10),%xmm1 movd 36(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm11,%xmm1 pslld $7,%xmm2 movdqa %xmm5,144-128(%rax) paddd %xmm14,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -96(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm11,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm4 pslld $26-21,%xmm2 pandn %xmm13,%xmm0 pand %xmm12,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm15,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm15,%xmm7 pslld $10,%xmm2 pxor %xmm15,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm8,%xmm14 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm14 paddd %xmm5,%xmm10 pxor %xmm2,%xmm7 paddd %xmm5,%xmm14 paddd %xmm7,%xmm14 movd 40(%r8),%xmm5 movd 40(%r9),%xmm0 movd 40(%r10),%xmm1 movd 40(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm10,%xmm7 .byte 102,15,56,0,238 movdqa %xmm10,%xmm2 psrld $6,%xmm7 movdqa %xmm10,%xmm1 pslld $7,%xmm2 movdqa %xmm5,160-128(%rax) paddd %xmm13,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm10,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm3 pslld $26-21,%xmm2 pandn %xmm12,%xmm0 pand %xmm11,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm14,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm15,%xmm3 movdqa %xmm14,%xmm7 pslld $10,%xmm2 pxor %xmm14,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm15,%xmm13 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm13 paddd %xmm5,%xmm9 pxor %xmm2,%xmm7 paddd %xmm5,%xmm13 paddd %xmm7,%xmm13 movd 44(%r8),%xmm5 movd 44(%r9),%xmm0 movd 44(%r10),%xmm1 movd 44(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm9,%xmm7 movdqa %xmm9,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm9,%xmm1 pslld $7,%xmm2 movdqa %xmm5,176-128(%rax) paddd %xmm12,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -32(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm9,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm4 pslld $26-21,%xmm2 pandn %xmm11,%xmm0 pand %xmm10,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm13,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm14,%xmm4 movdqa %xmm13,%xmm7 pslld $10,%xmm2 pxor %xmm13,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm14,%xmm12 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm12 paddd %xmm5,%xmm8 pxor %xmm2,%xmm7 paddd %xmm5,%xmm12 paddd %xmm7,%xmm12 movd 48(%r8),%xmm5 movd 48(%r9),%xmm0 movd 48(%r10),%xmm1 movd 48(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm8,%xmm7 .byte 102,15,56,0,238 movdqa %xmm8,%xmm2 psrld $6,%xmm7 movdqa %xmm8,%xmm1 pslld $7,%xmm2 movdqa %xmm5,192-128(%rax) paddd %xmm11,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 0(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm8,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm3 pslld $26-21,%xmm2 pandn %xmm10,%xmm0 pand %xmm9,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm12,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm13,%xmm3 movdqa %xmm12,%xmm7 pslld $10,%xmm2 pxor %xmm12,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm13,%xmm11 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm11 paddd %xmm5,%xmm15 pxor %xmm2,%xmm7 paddd %xmm5,%xmm11 paddd %xmm7,%xmm11 movd 52(%r8),%xmm5 movd 52(%r9),%xmm0 movd 52(%r10),%xmm1 movd 52(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm15,%xmm7 movdqa %xmm15,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm15,%xmm1 pslld $7,%xmm2 movdqa %xmm5,208-128(%rax) paddd %xmm10,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 32(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm15,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm4 pslld $26-21,%xmm2 pandn %xmm9,%xmm0 pand %xmm8,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm11,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm12,%xmm4 movdqa %xmm11,%xmm7 pslld $10,%xmm2 pxor %xmm11,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm12,%xmm10 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm10 paddd %xmm5,%xmm14 pxor %xmm2,%xmm7 paddd %xmm5,%xmm10 paddd %xmm7,%xmm10 movd 56(%r8),%xmm5 movd 56(%r9),%xmm0 movd 56(%r10),%xmm1 movd 56(%r11),%xmm2 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm14,%xmm7 .byte 102,15,56,0,238 movdqa %xmm14,%xmm2 psrld $6,%xmm7 movdqa %xmm14,%xmm1 pslld $7,%xmm2 movdqa %xmm5,224-128(%rax) paddd %xmm9,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm14,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm3 pslld $26-21,%xmm2 pandn %xmm8,%xmm0 pand %xmm15,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm10,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm11,%xmm3 movdqa %xmm10,%xmm7 pslld $10,%xmm2 pxor %xmm10,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm11,%xmm9 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm9 paddd %xmm5,%xmm13 pxor %xmm2,%xmm7 paddd %xmm5,%xmm9 paddd %xmm7,%xmm9 movd 60(%r8),%xmm5 leaq 64(%r8),%r8 movd 60(%r9),%xmm0 leaq 64(%r9),%r9 movd 60(%r10),%xmm1 leaq 64(%r10),%r10 movd 60(%r11),%xmm2 leaq 64(%r11),%r11 punpckldq %xmm1,%xmm5 punpckldq %xmm2,%xmm0 punpckldq %xmm0,%xmm5 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm2 .byte 102,15,56,0,238 psrld $6,%xmm7 movdqa %xmm13,%xmm1 pslld $7,%xmm2 movdqa %xmm5,240-128(%rax) paddd %xmm8,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 96(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm13,%xmm0 prefetcht0 63(%r8) pxor %xmm2,%xmm7 movdqa %xmm13,%xmm4 pslld $26-21,%xmm2 pandn %xmm15,%xmm0 pand %xmm14,%xmm4 pxor %xmm1,%xmm7 prefetcht0 63(%r9) movdqa %xmm9,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm4,%xmm0 movdqa %xmm10,%xmm4 movdqa %xmm9,%xmm7 pslld $10,%xmm2 pxor %xmm9,%xmm4 prefetcht0 63(%r10) psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 prefetcht0 63(%r11) psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm10,%xmm8 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm8 paddd %xmm5,%xmm12 pxor %xmm2,%xmm7 paddd %xmm5,%xmm8 paddd %xmm7,%xmm8 leaq 256(%rbp),%rbp movdqu 0-128(%rax),%xmm5 movl $3,%ecx jmp .Loop_16_xx .align 32 .Loop_16_xx: movdqa 16-128(%rax),%xmm6 paddd 144-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 224-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm12,%xmm7 movdqa %xmm12,%xmm2 psrld $6,%xmm7 movdqa %xmm12,%xmm1 pslld $7,%xmm2 movdqa %xmm5,0-128(%rax) paddd %xmm15,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -128(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm12,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm3 pslld $26-21,%xmm2 pandn %xmm14,%xmm0 pand %xmm13,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm8,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm9,%xmm3 movdqa %xmm8,%xmm7 pslld $10,%xmm2 pxor %xmm8,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm9,%xmm15 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm15 paddd %xmm5,%xmm11 pxor %xmm2,%xmm7 paddd %xmm5,%xmm15 paddd %xmm7,%xmm15 movdqa 32-128(%rax),%xmm5 paddd 160-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 240-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm2 psrld $6,%xmm7 movdqa %xmm11,%xmm1 pslld $7,%xmm2 movdqa %xmm6,16-128(%rax) paddd %xmm14,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -96(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm11,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm4 pslld $26-21,%xmm2 pandn %xmm13,%xmm0 pand %xmm12,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm15,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm15,%xmm7 pslld $10,%xmm2 pxor %xmm15,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm8,%xmm14 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm14 paddd %xmm6,%xmm10 pxor %xmm2,%xmm7 paddd %xmm6,%xmm14 paddd %xmm7,%xmm14 movdqa 48-128(%rax),%xmm6 paddd 176-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 0-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm10,%xmm7 movdqa %xmm10,%xmm2 psrld $6,%xmm7 movdqa %xmm10,%xmm1 pslld $7,%xmm2 movdqa %xmm5,32-128(%rax) paddd %xmm13,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm10,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm3 pslld $26-21,%xmm2 pandn %xmm12,%xmm0 pand %xmm11,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm14,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm15,%xmm3 movdqa %xmm14,%xmm7 pslld $10,%xmm2 pxor %xmm14,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm15,%xmm13 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm13 paddd %xmm5,%xmm9 pxor %xmm2,%xmm7 paddd %xmm5,%xmm13 paddd %xmm7,%xmm13 movdqa 64-128(%rax),%xmm5 paddd 192-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 16-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm9,%xmm7 movdqa %xmm9,%xmm2 psrld $6,%xmm7 movdqa %xmm9,%xmm1 pslld $7,%xmm2 movdqa %xmm6,48-128(%rax) paddd %xmm12,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -32(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm9,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm4 pslld $26-21,%xmm2 pandn %xmm11,%xmm0 pand %xmm10,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm13,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm14,%xmm4 movdqa %xmm13,%xmm7 pslld $10,%xmm2 pxor %xmm13,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm14,%xmm12 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm12 paddd %xmm6,%xmm8 pxor %xmm2,%xmm7 paddd %xmm6,%xmm12 paddd %xmm7,%xmm12 movdqa 80-128(%rax),%xmm6 paddd 208-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 32-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm8,%xmm7 movdqa %xmm8,%xmm2 psrld $6,%xmm7 movdqa %xmm8,%xmm1 pslld $7,%xmm2 movdqa %xmm5,64-128(%rax) paddd %xmm11,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 0(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm8,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm3 pslld $26-21,%xmm2 pandn %xmm10,%xmm0 pand %xmm9,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm12,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm13,%xmm3 movdqa %xmm12,%xmm7 pslld $10,%xmm2 pxor %xmm12,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm13,%xmm11 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm11 paddd %xmm5,%xmm15 pxor %xmm2,%xmm7 paddd %xmm5,%xmm11 paddd %xmm7,%xmm11 movdqa 96-128(%rax),%xmm5 paddd 224-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 48-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm15,%xmm7 movdqa %xmm15,%xmm2 psrld $6,%xmm7 movdqa %xmm15,%xmm1 pslld $7,%xmm2 movdqa %xmm6,80-128(%rax) paddd %xmm10,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 32(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm15,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm4 pslld $26-21,%xmm2 pandn %xmm9,%xmm0 pand %xmm8,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm11,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm12,%xmm4 movdqa %xmm11,%xmm7 pslld $10,%xmm2 pxor %xmm11,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm12,%xmm10 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm10 paddd %xmm6,%xmm14 pxor %xmm2,%xmm7 paddd %xmm6,%xmm10 paddd %xmm7,%xmm10 movdqa 112-128(%rax),%xmm6 paddd 240-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 64-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm14,%xmm7 movdqa %xmm14,%xmm2 psrld $6,%xmm7 movdqa %xmm14,%xmm1 pslld $7,%xmm2 movdqa %xmm5,96-128(%rax) paddd %xmm9,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm14,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm3 pslld $26-21,%xmm2 pandn %xmm8,%xmm0 pand %xmm15,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm10,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm11,%xmm3 movdqa %xmm10,%xmm7 pslld $10,%xmm2 pxor %xmm10,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm11,%xmm9 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm9 paddd %xmm5,%xmm13 pxor %xmm2,%xmm7 paddd %xmm5,%xmm9 paddd %xmm7,%xmm9 movdqa 128-128(%rax),%xmm5 paddd 0-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 80-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm2 psrld $6,%xmm7 movdqa %xmm13,%xmm1 pslld $7,%xmm2 movdqa %xmm6,112-128(%rax) paddd %xmm8,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 96(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm13,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm4 pslld $26-21,%xmm2 pandn %xmm15,%xmm0 pand %xmm14,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm9,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm10,%xmm4 movdqa %xmm9,%xmm7 pslld $10,%xmm2 pxor %xmm9,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm10,%xmm8 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm8 paddd %xmm6,%xmm12 pxor %xmm2,%xmm7 paddd %xmm6,%xmm8 paddd %xmm7,%xmm8 leaq 256(%rbp),%rbp movdqa 144-128(%rax),%xmm6 paddd 16-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 96-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm12,%xmm7 movdqa %xmm12,%xmm2 psrld $6,%xmm7 movdqa %xmm12,%xmm1 pslld $7,%xmm2 movdqa %xmm5,128-128(%rax) paddd %xmm15,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -128(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm12,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm3 pslld $26-21,%xmm2 pandn %xmm14,%xmm0 pand %xmm13,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm8,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm9,%xmm3 movdqa %xmm8,%xmm7 pslld $10,%xmm2 pxor %xmm8,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm9,%xmm15 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm15 paddd %xmm5,%xmm11 pxor %xmm2,%xmm7 paddd %xmm5,%xmm15 paddd %xmm7,%xmm15 movdqa 160-128(%rax),%xmm5 paddd 32-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 112-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm11,%xmm7 movdqa %xmm11,%xmm2 psrld $6,%xmm7 movdqa %xmm11,%xmm1 pslld $7,%xmm2 movdqa %xmm6,144-128(%rax) paddd %xmm14,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -96(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm11,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm4 pslld $26-21,%xmm2 pandn %xmm13,%xmm0 pand %xmm12,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm15,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm8,%xmm4 movdqa %xmm15,%xmm7 pslld $10,%xmm2 pxor %xmm15,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm8,%xmm14 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm14 paddd %xmm6,%xmm10 pxor %xmm2,%xmm7 paddd %xmm6,%xmm14 paddd %xmm7,%xmm14 movdqa 176-128(%rax),%xmm6 paddd 48-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 128-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm10,%xmm7 movdqa %xmm10,%xmm2 psrld $6,%xmm7 movdqa %xmm10,%xmm1 pslld $7,%xmm2 movdqa %xmm5,160-128(%rax) paddd %xmm13,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm10,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm3 pslld $26-21,%xmm2 pandn %xmm12,%xmm0 pand %xmm11,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm14,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm15,%xmm3 movdqa %xmm14,%xmm7 pslld $10,%xmm2 pxor %xmm14,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm15,%xmm13 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm13 paddd %xmm5,%xmm9 pxor %xmm2,%xmm7 paddd %xmm5,%xmm13 paddd %xmm7,%xmm13 movdqa 192-128(%rax),%xmm5 paddd 64-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 144-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm9,%xmm7 movdqa %xmm9,%xmm2 psrld $6,%xmm7 movdqa %xmm9,%xmm1 pslld $7,%xmm2 movdqa %xmm6,176-128(%rax) paddd %xmm12,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd -32(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm9,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm4 pslld $26-21,%xmm2 pandn %xmm11,%xmm0 pand %xmm10,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm13,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm14,%xmm4 movdqa %xmm13,%xmm7 pslld $10,%xmm2 pxor %xmm13,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm14,%xmm12 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm12 paddd %xmm6,%xmm8 pxor %xmm2,%xmm7 paddd %xmm6,%xmm12 paddd %xmm7,%xmm12 movdqa 208-128(%rax),%xmm6 paddd 80-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 160-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm8,%xmm7 movdqa %xmm8,%xmm2 psrld $6,%xmm7 movdqa %xmm8,%xmm1 pslld $7,%xmm2 movdqa %xmm5,192-128(%rax) paddd %xmm11,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 0(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm8,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm8,%xmm3 pslld $26-21,%xmm2 pandn %xmm10,%xmm0 pand %xmm9,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm12,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm12,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm13,%xmm3 movdqa %xmm12,%xmm7 pslld $10,%xmm2 pxor %xmm12,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm13,%xmm11 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm11 paddd %xmm5,%xmm15 pxor %xmm2,%xmm7 paddd %xmm5,%xmm11 paddd %xmm7,%xmm11 movdqa 224-128(%rax),%xmm5 paddd 96-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 176-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm15,%xmm7 movdqa %xmm15,%xmm2 psrld $6,%xmm7 movdqa %xmm15,%xmm1 pslld $7,%xmm2 movdqa %xmm6,208-128(%rax) paddd %xmm10,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 32(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm15,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm15,%xmm4 pslld $26-21,%xmm2 pandn %xmm9,%xmm0 pand %xmm8,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm11,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm11,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm12,%xmm4 movdqa %xmm11,%xmm7 pslld $10,%xmm2 pxor %xmm11,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm12,%xmm10 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm10 paddd %xmm6,%xmm14 pxor %xmm2,%xmm7 paddd %xmm6,%xmm10 paddd %xmm7,%xmm10 movdqa 240-128(%rax),%xmm6 paddd 112-128(%rax),%xmm5 movdqa %xmm6,%xmm7 movdqa %xmm6,%xmm1 psrld $3,%xmm7 movdqa %xmm6,%xmm2 psrld $7,%xmm1 movdqa 192-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm3 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm3,%xmm1 psrld $17,%xmm3 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 psrld $19-17,%xmm3 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm3,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm5 movdqa %xmm14,%xmm7 movdqa %xmm14,%xmm2 psrld $6,%xmm7 movdqa %xmm14,%xmm1 pslld $7,%xmm2 movdqa %xmm5,224-128(%rax) paddd %xmm9,%xmm5 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 64(%rbp),%xmm5 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm14,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm14,%xmm3 pslld $26-21,%xmm2 pandn %xmm8,%xmm0 pand %xmm15,%xmm3 pxor %xmm1,%xmm7 movdqa %xmm10,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm10,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm5 pxor %xmm3,%xmm0 movdqa %xmm11,%xmm3 movdqa %xmm10,%xmm7 pslld $10,%xmm2 pxor %xmm10,%xmm3 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm5 pslld $19-10,%xmm2 pand %xmm3,%xmm4 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm11,%xmm9 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm4,%xmm9 paddd %xmm5,%xmm13 pxor %xmm2,%xmm7 paddd %xmm5,%xmm9 paddd %xmm7,%xmm9 movdqa 0-128(%rax),%xmm5 paddd 128-128(%rax),%xmm6 movdqa %xmm5,%xmm7 movdqa %xmm5,%xmm1 psrld $3,%xmm7 movdqa %xmm5,%xmm2 psrld $7,%xmm1 movdqa 208-128(%rax),%xmm0 pslld $14,%xmm2 pxor %xmm1,%xmm7 psrld $18-7,%xmm1 movdqa %xmm0,%xmm4 pxor %xmm2,%xmm7 pslld $25-14,%xmm2 pxor %xmm1,%xmm7 psrld $10,%xmm0 movdqa %xmm4,%xmm1 psrld $17,%xmm4 pxor %xmm2,%xmm7 pslld $13,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 psrld $19-17,%xmm4 pxor %xmm1,%xmm0 pslld $15-13,%xmm1 pxor %xmm4,%xmm0 pxor %xmm1,%xmm0 paddd %xmm0,%xmm6 movdqa %xmm13,%xmm7 movdqa %xmm13,%xmm2 psrld $6,%xmm7 movdqa %xmm13,%xmm1 pslld $7,%xmm2 movdqa %xmm6,240-128(%rax) paddd %xmm8,%xmm6 psrld $11,%xmm1 pxor %xmm2,%xmm7 pslld $21-7,%xmm2 paddd 96(%rbp),%xmm6 pxor %xmm1,%xmm7 psrld $25-11,%xmm1 movdqa %xmm13,%xmm0 pxor %xmm2,%xmm7 movdqa %xmm13,%xmm4 pslld $26-21,%xmm2 pandn %xmm15,%xmm0 pand %xmm14,%xmm4 pxor %xmm1,%xmm7 movdqa %xmm9,%xmm1 pxor %xmm2,%xmm7 movdqa %xmm9,%xmm2 psrld $2,%xmm1 paddd %xmm7,%xmm6 pxor %xmm4,%xmm0 movdqa %xmm10,%xmm4 movdqa %xmm9,%xmm7 pslld $10,%xmm2 pxor %xmm9,%xmm4 psrld $13,%xmm7 pxor %xmm2,%xmm1 paddd %xmm0,%xmm6 pslld $19-10,%xmm2 pand %xmm4,%xmm3 pxor %xmm7,%xmm1 psrld $22-13,%xmm7 pxor %xmm2,%xmm1 movdqa %xmm10,%xmm8 pslld $30-19,%xmm2 pxor %xmm1,%xmm7 pxor %xmm3,%xmm8 paddd %xmm6,%xmm12 pxor %xmm2,%xmm7 paddd %xmm6,%xmm8 paddd %xmm7,%xmm8 leaq 256(%rbp),%rbp decl %ecx jnz .Loop_16_xx movl $1,%ecx leaq K256+128(%rip),%rbp movdqa (%rbx),%xmm7 cmpl 0(%rbx),%ecx pxor %xmm0,%xmm0 cmovgeq %rbp,%r8 cmpl 4(%rbx),%ecx movdqa %xmm7,%xmm6 cmovgeq %rbp,%r9 cmpl 8(%rbx),%ecx pcmpgtd %xmm0,%xmm6 cmovgeq %rbp,%r10 cmpl 12(%rbx),%ecx paddd %xmm6,%xmm7 cmovgeq %rbp,%r11 movdqu 0-128(%rdi),%xmm0 pand %xmm6,%xmm8 movdqu 32-128(%rdi),%xmm1 pand %xmm6,%xmm9 movdqu 64-128(%rdi),%xmm2 pand %xmm6,%xmm10 movdqu 96-128(%rdi),%xmm5 pand %xmm6,%xmm11 paddd %xmm0,%xmm8 movdqu 128-128(%rdi),%xmm0 pand %xmm6,%xmm12 paddd %xmm1,%xmm9 movdqu 160-128(%rdi),%xmm1 pand %xmm6,%xmm13 paddd %xmm2,%xmm10 movdqu 192-128(%rdi),%xmm2 pand %xmm6,%xmm14 paddd %xmm5,%xmm11 movdqu 224-128(%rdi),%xmm5 pand %xmm6,%xmm15 paddd %xmm0,%xmm12 paddd %xmm1,%xmm13 movdqu %xmm8,0-128(%rdi) paddd %xmm2,%xmm14 movdqu %xmm9,32-128(%rdi) paddd %xmm5,%xmm15 movdqu %xmm10,64-128(%rdi) movdqu %xmm11,96-128(%rdi) movdqu %xmm12,128-128(%rdi) movdqu %xmm13,160-128(%rdi) movdqu %xmm14,192-128(%rdi) movdqu %xmm15,224-128(%rdi) movdqa %xmm7,(%rbx) movdqa .Lpbswap(%rip),%xmm6 decl %edx jnz .Loop movl 280(%rsp),%edx leaq 16(%rdi),%rdi leaq 64(%rsi),%rsi decl %edx jnz .Loop_grande .Ldone: movq 272(%rsp),%rax movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha256_multi_block,.-sha256_multi_block .type sha256_multi_block_shaext,@function .align 32 sha256_multi_block_shaext: _shaext_shortcut: movq %rsp,%rax pushq %rbx pushq %rbp subq $288,%rsp shll $1,%edx andq $-256,%rsp leaq 128(%rdi),%rdi movq %rax,272(%rsp) .Lbody_shaext: leaq 256(%rsp),%rbx leaq K256_shaext+128(%rip),%rbp .Loop_grande_shaext: movl %edx,280(%rsp) xorl %edx,%edx movq 0(%rsi),%r8 movl 8(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,0(%rbx) cmovleq %rsp,%r8 movq 16(%rsi),%r9 movl 24(%rsi),%ecx cmpl %edx,%ecx cmovgl %ecx,%edx testl %ecx,%ecx movl %ecx,4(%rbx) cmovleq %rsp,%r9 testl %edx,%edx jz .Ldone_shaext movq 0-128(%rdi),%xmm12 movq 32-128(%rdi),%xmm4 movq 64-128(%rdi),%xmm13 movq 96-128(%rdi),%xmm5 movq 128-128(%rdi),%xmm8 movq 160-128(%rdi),%xmm9 movq 192-128(%rdi),%xmm10 movq 224-128(%rdi),%xmm11 punpckldq %xmm4,%xmm12 punpckldq %xmm5,%xmm13 punpckldq %xmm9,%xmm8 punpckldq %xmm11,%xmm10 movdqa K256_shaext-16(%rip),%xmm3 movdqa %xmm12,%xmm14 movdqa %xmm13,%xmm15 punpcklqdq %xmm8,%xmm12 punpcklqdq %xmm10,%xmm13 punpckhqdq %xmm8,%xmm14 punpckhqdq %xmm10,%xmm15 pshufd $27,%xmm12,%xmm12 pshufd $27,%xmm13,%xmm13 pshufd $27,%xmm14,%xmm14 pshufd $27,%xmm15,%xmm15 jmp .Loop_shaext .align 32 .Loop_shaext: movdqu 0(%r8),%xmm4 movdqu 0(%r9),%xmm8 movdqu 16(%r8),%xmm5 movdqu 16(%r9),%xmm9 movdqu 32(%r8),%xmm6 .byte 102,15,56,0,227 movdqu 32(%r9),%xmm10 .byte 102,68,15,56,0,195 movdqu 48(%r8),%xmm7 leaq 64(%r8),%r8 movdqu 48(%r9),%xmm11 leaq 64(%r9),%r9 movdqa 0-128(%rbp),%xmm0 .byte 102,15,56,0,235 paddd %xmm4,%xmm0 pxor %xmm12,%xmm4 movdqa %xmm0,%xmm1 movdqa 0-128(%rbp),%xmm2 .byte 102,68,15,56,0,203 paddd %xmm8,%xmm2 movdqa %xmm13,80(%rsp) .byte 69,15,56,203,236 pxor %xmm14,%xmm8 movdqa %xmm2,%xmm0 movdqa %xmm15,112(%rsp) .byte 69,15,56,203,254 pshufd $0x0e,%xmm1,%xmm0 pxor %xmm12,%xmm4 movdqa %xmm12,64(%rsp) .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 pxor %xmm14,%xmm8 movdqa %xmm14,96(%rsp) movdqa 16-128(%rbp),%xmm1 paddd %xmm5,%xmm1 .byte 102,15,56,0,243 .byte 69,15,56,203,247 movdqa %xmm1,%xmm0 movdqa 16-128(%rbp),%xmm2 paddd %xmm9,%xmm2 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 prefetcht0 127(%r8) .byte 102,15,56,0,251 .byte 102,68,15,56,0,211 prefetcht0 127(%r9) .byte 69,15,56,203,254 pshufd $0x0e,%xmm1,%xmm0 .byte 102,68,15,56,0,219 .byte 15,56,204,229 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 32-128(%rbp),%xmm1 paddd %xmm6,%xmm1 .byte 69,15,56,203,247 movdqa %xmm1,%xmm0 movdqa 32-128(%rbp),%xmm2 paddd %xmm10,%xmm2 .byte 69,15,56,203,236 .byte 69,15,56,204,193 movdqa %xmm2,%xmm0 movdqa %xmm7,%xmm3 .byte 69,15,56,203,254 pshufd $0x0e,%xmm1,%xmm0 .byte 102,15,58,15,222,4 paddd %xmm3,%xmm4 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 .byte 15,56,204,238 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 48-128(%rbp),%xmm1 paddd %xmm7,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,202 movdqa %xmm1,%xmm0 movdqa 48-128(%rbp),%xmm2 paddd %xmm3,%xmm8 paddd %xmm11,%xmm2 .byte 15,56,205,231 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm4,%xmm3 .byte 102,15,58,15,223,4 .byte 69,15,56,203,254 .byte 69,15,56,205,195 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm5 movdqa %xmm8,%xmm3 .byte 102,65,15,58,15,219,4 .byte 15,56,204,247 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 64-128(%rbp),%xmm1 paddd %xmm4,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,211 movdqa %xmm1,%xmm0 movdqa 64-128(%rbp),%xmm2 paddd %xmm3,%xmm9 paddd %xmm8,%xmm2 .byte 15,56,205,236 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm5,%xmm3 .byte 102,15,58,15,220,4 .byte 69,15,56,203,254 .byte 69,15,56,205,200 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm6 movdqa %xmm9,%xmm3 .byte 102,65,15,58,15,216,4 .byte 15,56,204,252 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 80-128(%rbp),%xmm1 paddd %xmm5,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,216 movdqa %xmm1,%xmm0 movdqa 80-128(%rbp),%xmm2 paddd %xmm3,%xmm10 paddd %xmm9,%xmm2 .byte 15,56,205,245 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm6,%xmm3 .byte 102,15,58,15,221,4 .byte 69,15,56,203,254 .byte 69,15,56,205,209 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm7 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,217,4 .byte 15,56,204,229 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 96-128(%rbp),%xmm1 paddd %xmm6,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,193 movdqa %xmm1,%xmm0 movdqa 96-128(%rbp),%xmm2 paddd %xmm3,%xmm11 paddd %xmm10,%xmm2 .byte 15,56,205,254 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm7,%xmm3 .byte 102,15,58,15,222,4 .byte 69,15,56,203,254 .byte 69,15,56,205,218 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm4 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 .byte 15,56,204,238 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 112-128(%rbp),%xmm1 paddd %xmm7,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,202 movdqa %xmm1,%xmm0 movdqa 112-128(%rbp),%xmm2 paddd %xmm3,%xmm8 paddd %xmm11,%xmm2 .byte 15,56,205,231 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm4,%xmm3 .byte 102,15,58,15,223,4 .byte 69,15,56,203,254 .byte 69,15,56,205,195 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm5 movdqa %xmm8,%xmm3 .byte 102,65,15,58,15,219,4 .byte 15,56,204,247 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 128-128(%rbp),%xmm1 paddd %xmm4,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,211 movdqa %xmm1,%xmm0 movdqa 128-128(%rbp),%xmm2 paddd %xmm3,%xmm9 paddd %xmm8,%xmm2 .byte 15,56,205,236 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm5,%xmm3 .byte 102,15,58,15,220,4 .byte 69,15,56,203,254 .byte 69,15,56,205,200 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm6 movdqa %xmm9,%xmm3 .byte 102,65,15,58,15,216,4 .byte 15,56,204,252 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 144-128(%rbp),%xmm1 paddd %xmm5,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,216 movdqa %xmm1,%xmm0 movdqa 144-128(%rbp),%xmm2 paddd %xmm3,%xmm10 paddd %xmm9,%xmm2 .byte 15,56,205,245 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm6,%xmm3 .byte 102,15,58,15,221,4 .byte 69,15,56,203,254 .byte 69,15,56,205,209 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm7 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,217,4 .byte 15,56,204,229 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 160-128(%rbp),%xmm1 paddd %xmm6,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,193 movdqa %xmm1,%xmm0 movdqa 160-128(%rbp),%xmm2 paddd %xmm3,%xmm11 paddd %xmm10,%xmm2 .byte 15,56,205,254 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm7,%xmm3 .byte 102,15,58,15,222,4 .byte 69,15,56,203,254 .byte 69,15,56,205,218 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm4 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 .byte 15,56,204,238 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 176-128(%rbp),%xmm1 paddd %xmm7,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,202 movdqa %xmm1,%xmm0 movdqa 176-128(%rbp),%xmm2 paddd %xmm3,%xmm8 paddd %xmm11,%xmm2 .byte 15,56,205,231 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm4,%xmm3 .byte 102,15,58,15,223,4 .byte 69,15,56,203,254 .byte 69,15,56,205,195 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm5 movdqa %xmm8,%xmm3 .byte 102,65,15,58,15,219,4 .byte 15,56,204,247 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 192-128(%rbp),%xmm1 paddd %xmm4,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,211 movdqa %xmm1,%xmm0 movdqa 192-128(%rbp),%xmm2 paddd %xmm3,%xmm9 paddd %xmm8,%xmm2 .byte 15,56,205,236 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm5,%xmm3 .byte 102,15,58,15,220,4 .byte 69,15,56,203,254 .byte 69,15,56,205,200 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm6 movdqa %xmm9,%xmm3 .byte 102,65,15,58,15,216,4 .byte 15,56,204,252 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 208-128(%rbp),%xmm1 paddd %xmm5,%xmm1 .byte 69,15,56,203,247 .byte 69,15,56,204,216 movdqa %xmm1,%xmm0 movdqa 208-128(%rbp),%xmm2 paddd %xmm3,%xmm10 paddd %xmm9,%xmm2 .byte 15,56,205,245 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movdqa %xmm6,%xmm3 .byte 102,15,58,15,221,4 .byte 69,15,56,203,254 .byte 69,15,56,205,209 pshufd $0x0e,%xmm1,%xmm0 paddd %xmm3,%xmm7 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,217,4 nop .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 224-128(%rbp),%xmm1 paddd %xmm6,%xmm1 .byte 69,15,56,203,247 movdqa %xmm1,%xmm0 movdqa 224-128(%rbp),%xmm2 paddd %xmm3,%xmm11 paddd %xmm10,%xmm2 .byte 15,56,205,254 nop .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 movl $1,%ecx pxor %xmm6,%xmm6 .byte 69,15,56,203,254 .byte 69,15,56,205,218 pshufd $0x0e,%xmm1,%xmm0 movdqa 240-128(%rbp),%xmm1 paddd %xmm7,%xmm1 movq (%rbx),%xmm7 nop .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 movdqa 240-128(%rbp),%xmm2 paddd %xmm11,%xmm2 .byte 69,15,56,203,247 movdqa %xmm1,%xmm0 cmpl 0(%rbx),%ecx cmovgeq %rsp,%r8 cmpl 4(%rbx),%ecx cmovgeq %rsp,%r9 pshufd $0x00,%xmm7,%xmm9 .byte 69,15,56,203,236 movdqa %xmm2,%xmm0 pshufd $0x55,%xmm7,%xmm10 movdqa %xmm7,%xmm11 .byte 69,15,56,203,254 pshufd $0x0e,%xmm1,%xmm0 pcmpgtd %xmm6,%xmm9 pcmpgtd %xmm6,%xmm10 .byte 69,15,56,203,229 pshufd $0x0e,%xmm2,%xmm0 pcmpgtd %xmm6,%xmm11 movdqa K256_shaext-16(%rip),%xmm3 .byte 69,15,56,203,247 pand %xmm9,%xmm13 pand %xmm10,%xmm15 pand %xmm9,%xmm12 pand %xmm10,%xmm14 paddd %xmm7,%xmm11 paddd 80(%rsp),%xmm13 paddd 112(%rsp),%xmm15 paddd 64(%rsp),%xmm12 paddd 96(%rsp),%xmm14 movq %xmm11,(%rbx) decl %edx jnz .Loop_shaext movl 280(%rsp),%edx pshufd $27,%xmm12,%xmm12 pshufd $27,%xmm13,%xmm13 pshufd $27,%xmm14,%xmm14 pshufd $27,%xmm15,%xmm15 movdqa %xmm12,%xmm5 movdqa %xmm13,%xmm6 punpckldq %xmm14,%xmm12 punpckhdq %xmm14,%xmm5 punpckldq %xmm15,%xmm13 punpckhdq %xmm15,%xmm6 movq %xmm12,0-128(%rdi) psrldq $8,%xmm12 movq %xmm5,128-128(%rdi) psrldq $8,%xmm5 movq %xmm12,32-128(%rdi) movq %xmm5,160-128(%rdi) movq %xmm13,64-128(%rdi) psrldq $8,%xmm13 movq %xmm6,192-128(%rdi) psrldq $8,%xmm6 movq %xmm13,96-128(%rdi) movq %xmm6,224-128(%rdi) leaq 8(%rdi),%rdi leaq 32(%rsi),%rsi decl %edx jnz .Loop_grande_shaext .Ldone_shaext: movq -16(%rax),%rbp movq -8(%rax),%rbx leaq (%rax),%rsp .Lepilogue_shaext: .byte 0xf3,0xc3 .size sha256_multi_block_shaext,.-sha256_multi_block_shaext +.type sha256_multi_block_avx,@function +.align 32 +sha256_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody_avx: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + vmovdqu 96-128(%rdi),%xmm11 + vmovdqu 128-128(%rdi),%xmm12 + vmovdqu 160-128(%rdi),%xmm13 + vmovdqu 192-128(%rdi),%xmm14 + vmovdqu 224-128(%rdi),%xmm15 + vmovdqu .Lpbswap(%rip),%xmm6 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vpxor %xmm9,%xmm10,%xmm4 + vmovd 0(%r8),%xmm5 + vmovd 0(%r9),%xmm0 + vpinsrd $1,0(%r10),%xmm5,%xmm5 + vpinsrd $1,0(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 4(%r8),%xmm5 + vmovd 4(%r9),%xmm0 + vpinsrd $1,4(%r10),%xmm5,%xmm5 + vpinsrd $1,4(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,16-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 8(%r8),%xmm5 + vmovd 8(%r9),%xmm0 + vpinsrd $1,8(%r10),%xmm5,%xmm5 + vpinsrd $1,8(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 12(%r8),%xmm5 + vmovd 12(%r9),%xmm0 + vpinsrd $1,12(%r10),%xmm5,%xmm5 + vpinsrd $1,12(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,48-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 16(%r8),%xmm5 + vmovd 16(%r9),%xmm0 + vpinsrd $1,16(%r10),%xmm5,%xmm5 + vpinsrd $1,16(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 20(%r8),%xmm5 + vmovd 20(%r9),%xmm0 + vpinsrd $1,20(%r10),%xmm5,%xmm5 + vpinsrd $1,20(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,80-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 24(%r8),%xmm5 + vmovd 24(%r9),%xmm0 + vpinsrd $1,24(%r10),%xmm5,%xmm5 + vpinsrd $1,24(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 28(%r8),%xmm5 + vmovd 28(%r9),%xmm0 + vpinsrd $1,28(%r10),%xmm5,%xmm5 + vpinsrd $1,28(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,112-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovd 32(%r8),%xmm5 + vmovd 32(%r9),%xmm0 + vpinsrd $1,32(%r10),%xmm5,%xmm5 + vpinsrd $1,32(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 36(%r8),%xmm5 + vmovd 36(%r9),%xmm0 + vpinsrd $1,36(%r10),%xmm5,%xmm5 + vpinsrd $1,36(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,144-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 40(%r8),%xmm5 + vmovd 40(%r9),%xmm0 + vpinsrd $1,40(%r10),%xmm5,%xmm5 + vpinsrd $1,40(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 44(%r8),%xmm5 + vmovd 44(%r9),%xmm0 + vpinsrd $1,44(%r10),%xmm5,%xmm5 + vpinsrd $1,44(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,176-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 48(%r8),%xmm5 + vmovd 48(%r9),%xmm0 + vpinsrd $1,48(%r10),%xmm5,%xmm5 + vpinsrd $1,48(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 52(%r8),%xmm5 + vmovd 52(%r9),%xmm0 + vpinsrd $1,52(%r10),%xmm5,%xmm5 + vpinsrd $1,52(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,208-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 56(%r8),%xmm5 + vmovd 56(%r9),%xmm0 + vpinsrd $1,56(%r10),%xmm5,%xmm5 + vpinsrd $1,56(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + vmovd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r10),%xmm5,%xmm5 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r11),%xmm0,%xmm0 + leaq 64(%r11),%r11 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,240-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r8) + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + prefetcht0 63(%r9) + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r10) + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + prefetcht0 63(%r11) + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx_avx +.align 32 +.Loop_16_xx_avx: + vmovdqu 16-128(%rax),%xmm6 + vpaddd 144-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 224-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 32-128(%rax),%xmm5 + vpaddd 160-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 240-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,16-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 48-128(%rax),%xmm6 + vpaddd 176-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 0-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 64-128(%rax),%xmm5 + vpaddd 192-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 16-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,48-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 80-128(%rax),%xmm6 + vpaddd 208-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 32-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 96-128(%rax),%xmm5 + vpaddd 224-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 48-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,80-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 112-128(%rax),%xmm6 + vpaddd 240-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 64-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 128-128(%rax),%xmm5 + vpaddd 0-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 80-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,112-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 144-128(%rax),%xmm6 + vpaddd 16-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 96-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 160-128(%rax),%xmm5 + vpaddd 32-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 112-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,144-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 176-128(%rax),%xmm6 + vpaddd 48-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 128-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 192-128(%rax),%xmm5 + vpaddd 64-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 144-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,176-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 208-128(%rax),%xmm6 + vpaddd 80-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 160-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 224-128(%rax),%xmm5 + vpaddd 96-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 176-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,208-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 240-128(%rax),%xmm6 + vpaddd 112-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 192-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 0-128(%rax),%xmm5 + vpaddd 128-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 208-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,240-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%xmm7 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa %xmm7,%xmm6 + vpcmpgtd %xmm0,%xmm6,%xmm6 + vpaddd %xmm6,%xmm7,%xmm7 + + vmovdqu 0-128(%rdi),%xmm0 + vpand %xmm6,%xmm8,%xmm8 + vmovdqu 32-128(%rdi),%xmm1 + vpand %xmm6,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm2 + vpand %xmm6,%xmm10,%xmm10 + vmovdqu 96-128(%rdi),%xmm5 + vpand %xmm6,%xmm11,%xmm11 + vpaddd %xmm0,%xmm8,%xmm8 + vmovdqu 128-128(%rdi),%xmm0 + vpand %xmm6,%xmm12,%xmm12 + vpaddd %xmm1,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm1 + vpand %xmm6,%xmm13,%xmm13 + vpaddd %xmm2,%xmm10,%xmm10 + vmovdqu 192-128(%rdi),%xmm2 + vpand %xmm6,%xmm14,%xmm14 + vpaddd %xmm5,%xmm11,%xmm11 + vmovdqu 224-128(%rdi),%xmm5 + vpand %xmm6,%xmm15,%xmm15 + vpaddd %xmm0,%xmm12,%xmm12 + vpaddd %xmm1,%xmm13,%xmm13 + vmovdqu %xmm8,0-128(%rdi) + vpaddd %xmm2,%xmm14,%xmm14 + vmovdqu %xmm9,32-128(%rdi) + vpaddd %xmm5,%xmm15,%xmm15 + vmovdqu %xmm10,64-128(%rdi) + vmovdqu %xmm11,96-128(%rdi) + vmovdqu %xmm12,128-128(%rdi) + vmovdqu %xmm13,160-128(%rdi) + vmovdqu %xmm14,192-128(%rdi) + vmovdqu %xmm15,224-128(%rdi) + + vmovdqu %xmm7,(%rbx) + vmovdqu .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha256_multi_block_avx,.-sha256_multi_block_avx +.type sha256_multi_block_avx2,@function +.align 32 +sha256_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.Lbody_avx2: + leaq K256+128(%rip),%rbp + leaq 128(%rdi),%rdi + +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0-128(%rdi),%ymm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%ymm9 + leaq 256+128(%rsp),%rbx + vmovdqu 64-128(%rdi),%ymm10 + vmovdqu 96-128(%rdi),%ymm11 + vmovdqu 128-128(%rdi),%ymm12 + vmovdqu 160-128(%rdi),%ymm13 + vmovdqu 192-128(%rdi),%ymm14 + vmovdqu 224-128(%rdi),%ymm15 + vmovdqu .Lpbswap(%rip),%ymm6 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vpxor %ymm9,%ymm10,%ymm4 + vmovd 0(%r12),%xmm5 + vmovd 0(%r8),%xmm0 + vmovd 0(%r13),%xmm1 + vmovd 0(%r9),%xmm2 + vpinsrd $1,0(%r14),%xmm5,%xmm5 + vpinsrd $1,0(%r10),%xmm0,%xmm0 + vpinsrd $1,0(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,0(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 4(%r12),%xmm5 + vmovd 4(%r8),%xmm0 + vmovd 4(%r13),%xmm1 + vmovd 4(%r9),%xmm2 + vpinsrd $1,4(%r14),%xmm5,%xmm5 + vpinsrd $1,4(%r10),%xmm0,%xmm0 + vpinsrd $1,4(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,4(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,32-128(%rax) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 8(%r12),%xmm5 + vmovd 8(%r8),%xmm0 + vmovd 8(%r13),%xmm1 + vmovd 8(%r9),%xmm2 + vpinsrd $1,8(%r14),%xmm5,%xmm5 + vpinsrd $1,8(%r10),%xmm0,%xmm0 + vpinsrd $1,8(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,8(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 12(%r12),%xmm5 + vmovd 12(%r8),%xmm0 + vmovd 12(%r13),%xmm1 + vmovd 12(%r9),%xmm2 + vpinsrd $1,12(%r14),%xmm5,%xmm5 + vpinsrd $1,12(%r10),%xmm0,%xmm0 + vpinsrd $1,12(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,12(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,96-128(%rax) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 16(%r12),%xmm5 + vmovd 16(%r8),%xmm0 + vmovd 16(%r13),%xmm1 + vmovd 16(%r9),%xmm2 + vpinsrd $1,16(%r14),%xmm5,%xmm5 + vpinsrd $1,16(%r10),%xmm0,%xmm0 + vpinsrd $1,16(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,16(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 20(%r12),%xmm5 + vmovd 20(%r8),%xmm0 + vmovd 20(%r13),%xmm1 + vmovd 20(%r9),%xmm2 + vpinsrd $1,20(%r14),%xmm5,%xmm5 + vpinsrd $1,20(%r10),%xmm0,%xmm0 + vpinsrd $1,20(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,20(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,160-128(%rax) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 24(%r12),%xmm5 + vmovd 24(%r8),%xmm0 + vmovd 24(%r13),%xmm1 + vmovd 24(%r9),%xmm2 + vpinsrd $1,24(%r14),%xmm5,%xmm5 + vpinsrd $1,24(%r10),%xmm0,%xmm0 + vpinsrd $1,24(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,24(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 28(%r12),%xmm5 + vmovd 28(%r8),%xmm0 + vmovd 28(%r13),%xmm1 + vmovd 28(%r9),%xmm2 + vpinsrd $1,28(%r14),%xmm5,%xmm5 + vpinsrd $1,28(%r10),%xmm0,%xmm0 + vpinsrd $1,28(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,28(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,224-128(%rax) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovd 32(%r12),%xmm5 + vmovd 32(%r8),%xmm0 + vmovd 32(%r13),%xmm1 + vmovd 32(%r9),%xmm2 + vpinsrd $1,32(%r14),%xmm5,%xmm5 + vpinsrd $1,32(%r10),%xmm0,%xmm0 + vpinsrd $1,32(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,32(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 36(%r12),%xmm5 + vmovd 36(%r8),%xmm0 + vmovd 36(%r13),%xmm1 + vmovd 36(%r9),%xmm2 + vpinsrd $1,36(%r14),%xmm5,%xmm5 + vpinsrd $1,36(%r10),%xmm0,%xmm0 + vpinsrd $1,36(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,36(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,288-256-128(%rbx) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 40(%r12),%xmm5 + vmovd 40(%r8),%xmm0 + vmovd 40(%r13),%xmm1 + vmovd 40(%r9),%xmm2 + vpinsrd $1,40(%r14),%xmm5,%xmm5 + vpinsrd $1,40(%r10),%xmm0,%xmm0 + vpinsrd $1,40(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,40(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 44(%r12),%xmm5 + vmovd 44(%r8),%xmm0 + vmovd 44(%r13),%xmm1 + vmovd 44(%r9),%xmm2 + vpinsrd $1,44(%r14),%xmm5,%xmm5 + vpinsrd $1,44(%r10),%xmm0,%xmm0 + vpinsrd $1,44(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,44(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,352-256-128(%rbx) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 48(%r12),%xmm5 + vmovd 48(%r8),%xmm0 + vmovd 48(%r13),%xmm1 + vmovd 48(%r9),%xmm2 + vpinsrd $1,48(%r14),%xmm5,%xmm5 + vpinsrd $1,48(%r10),%xmm0,%xmm0 + vpinsrd $1,48(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,48(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 52(%r12),%xmm5 + vmovd 52(%r8),%xmm0 + vmovd 52(%r13),%xmm1 + vmovd 52(%r9),%xmm2 + vpinsrd $1,52(%r14),%xmm5,%xmm5 + vpinsrd $1,52(%r10),%xmm0,%xmm0 + vpinsrd $1,52(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,52(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,416-256-128(%rbx) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 56(%r12),%xmm5 + vmovd 56(%r8),%xmm0 + vmovd 56(%r13),%xmm1 + vmovd 56(%r9),%xmm2 + vpinsrd $1,56(%r14),%xmm5,%xmm5 + vpinsrd $1,56(%r10),%xmm0,%xmm0 + vpinsrd $1,56(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,56(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 60(%r12),%xmm5 + leaq 64(%r12),%r12 + vmovd 60(%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd 60(%r13),%xmm1 + leaq 64(%r13),%r13 + vmovd 60(%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r14),%xmm5,%xmm5 + leaq 64(%r14),%r14 + vpinsrd $1,60(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r15),%xmm1,%xmm1 + leaq 64(%r15),%r15 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,60(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,480-256-128(%rbx) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r12) + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + prefetcht0 63(%r13) + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + prefetcht0 63(%r15) + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + prefetcht0 63(%r8) + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + prefetcht0 63(%r9) + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r10) + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + prefetcht0 63(%r11) + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%ymm5 + movl $3,%ecx + jmp .Loop_16_xx_avx2 +.align 32 +.Loop_16_xx_avx2: + vmovdqu 32-128(%rax),%ymm6 + vpaddd 288-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 448-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 64-128(%rax),%ymm5 + vpaddd 320-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 480-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,32-128(%rax) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 96-128(%rax),%ymm6 + vpaddd 352-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 0-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 128-128(%rax),%ymm5 + vpaddd 384-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 32-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,96-128(%rax) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 160-128(%rax),%ymm6 + vpaddd 416-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 64-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 192-128(%rax),%ymm5 + vpaddd 448-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 96-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,160-128(%rax) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 224-128(%rax),%ymm6 + vpaddd 480-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 128-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 256-256-128(%rbx),%ymm5 + vpaddd 0-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 160-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,224-128(%rax) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 288-256-128(%rbx),%ymm6 + vpaddd 32-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 192-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 320-256-128(%rbx),%ymm5 + vpaddd 64-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 224-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,288-256-128(%rbx) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 352-256-128(%rbx),%ymm6 + vpaddd 96-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 256-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 384-256-128(%rbx),%ymm5 + vpaddd 128-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 288-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,352-256-128(%rbx) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 416-256-128(%rbx),%ymm6 + vpaddd 160-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 320-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 448-256-128(%rbx),%ymm5 + vpaddd 192-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 352-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,416-256-128(%rbx) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 480-256-128(%rbx),%ymm6 + vpaddd 224-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 384-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 0-128(%rax),%ymm5 + vpaddd 256-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 416-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,480-256-128(%rbx) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx2 + + movl $1,%ecx + leaq 512(%rsp),%rbx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%ymm7 + vpxor %ymm0,%ymm0,%ymm0 + vmovdqa %ymm7,%ymm6 + vpcmpgtd %ymm0,%ymm6,%ymm6 + vpaddd %ymm6,%ymm7,%ymm7 + + vmovdqu 0-128(%rdi),%ymm0 + vpand %ymm6,%ymm8,%ymm8 + vmovdqu 32-128(%rdi),%ymm1 + vpand %ymm6,%ymm9,%ymm9 + vmovdqu 64-128(%rdi),%ymm2 + vpand %ymm6,%ymm10,%ymm10 + vmovdqu 96-128(%rdi),%ymm5 + vpand %ymm6,%ymm11,%ymm11 + vpaddd %ymm0,%ymm8,%ymm8 + vmovdqu 128-128(%rdi),%ymm0 + vpand %ymm6,%ymm12,%ymm12 + vpaddd %ymm1,%ymm9,%ymm9 + vmovdqu 160-128(%rdi),%ymm1 + vpand %ymm6,%ymm13,%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vmovdqu 192-128(%rdi),%ymm2 + vpand %ymm6,%ymm14,%ymm14 + vpaddd %ymm5,%ymm11,%ymm11 + vmovdqu 224-128(%rdi),%ymm5 + vpand %ymm6,%ymm15,%ymm15 + vpaddd %ymm0,%ymm12,%ymm12 + vpaddd %ymm1,%ymm13,%ymm13 + vmovdqu %ymm8,0-128(%rdi) + vpaddd %ymm2,%ymm14,%ymm14 + vmovdqu %ymm9,32-128(%rdi) + vpaddd %ymm5,%ymm15,%ymm15 + vmovdqu %ymm10,64-128(%rdi) + vmovdqu %ymm11,96-128(%rdi) + vmovdqu %ymm12,128-128(%rdi) + vmovdqu %ymm13,160-128(%rdi) + vmovdqu %ymm14,192-128(%rdi) + vmovdqu %ymm15,224-128(%rdi) + + vmovdqu %ymm7,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu .Lpbswap(%rip),%ymm6 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha256_multi_block_avx2,.-sha256_multi_block_avx2 .align 256 K256: .long 1116352408,1116352408,1116352408,1116352408 .long 1116352408,1116352408,1116352408,1116352408 .long 1899447441,1899447441,1899447441,1899447441 .long 1899447441,1899447441,1899447441,1899447441 .long 3049323471,3049323471,3049323471,3049323471 .long 3049323471,3049323471,3049323471,3049323471 .long 3921009573,3921009573,3921009573,3921009573 .long 3921009573,3921009573,3921009573,3921009573 .long 961987163,961987163,961987163,961987163 .long 961987163,961987163,961987163,961987163 .long 1508970993,1508970993,1508970993,1508970993 .long 1508970993,1508970993,1508970993,1508970993 .long 2453635748,2453635748,2453635748,2453635748 .long 2453635748,2453635748,2453635748,2453635748 .long 2870763221,2870763221,2870763221,2870763221 .long 2870763221,2870763221,2870763221,2870763221 .long 3624381080,3624381080,3624381080,3624381080 .long 3624381080,3624381080,3624381080,3624381080 .long 310598401,310598401,310598401,310598401 .long 310598401,310598401,310598401,310598401 .long 607225278,607225278,607225278,607225278 .long 607225278,607225278,607225278,607225278 .long 1426881987,1426881987,1426881987,1426881987 .long 1426881987,1426881987,1426881987,1426881987 .long 1925078388,1925078388,1925078388,1925078388 .long 1925078388,1925078388,1925078388,1925078388 .long 2162078206,2162078206,2162078206,2162078206 .long 2162078206,2162078206,2162078206,2162078206 .long 2614888103,2614888103,2614888103,2614888103 .long 2614888103,2614888103,2614888103,2614888103 .long 3248222580,3248222580,3248222580,3248222580 .long 3248222580,3248222580,3248222580,3248222580 .long 3835390401,3835390401,3835390401,3835390401 .long 3835390401,3835390401,3835390401,3835390401 .long 4022224774,4022224774,4022224774,4022224774 .long 4022224774,4022224774,4022224774,4022224774 .long 264347078,264347078,264347078,264347078 .long 264347078,264347078,264347078,264347078 .long 604807628,604807628,604807628,604807628 .long 604807628,604807628,604807628,604807628 .long 770255983,770255983,770255983,770255983 .long 770255983,770255983,770255983,770255983 .long 1249150122,1249150122,1249150122,1249150122 .long 1249150122,1249150122,1249150122,1249150122 .long 1555081692,1555081692,1555081692,1555081692 .long 1555081692,1555081692,1555081692,1555081692 .long 1996064986,1996064986,1996064986,1996064986 .long 1996064986,1996064986,1996064986,1996064986 .long 2554220882,2554220882,2554220882,2554220882 .long 2554220882,2554220882,2554220882,2554220882 .long 2821834349,2821834349,2821834349,2821834349 .long 2821834349,2821834349,2821834349,2821834349 .long 2952996808,2952996808,2952996808,2952996808 .long 2952996808,2952996808,2952996808,2952996808 .long 3210313671,3210313671,3210313671,3210313671 .long 3210313671,3210313671,3210313671,3210313671 .long 3336571891,3336571891,3336571891,3336571891 .long 3336571891,3336571891,3336571891,3336571891 .long 3584528711,3584528711,3584528711,3584528711 .long 3584528711,3584528711,3584528711,3584528711 .long 113926993,113926993,113926993,113926993 .long 113926993,113926993,113926993,113926993 .long 338241895,338241895,338241895,338241895 .long 338241895,338241895,338241895,338241895 .long 666307205,666307205,666307205,666307205 .long 666307205,666307205,666307205,666307205 .long 773529912,773529912,773529912,773529912 .long 773529912,773529912,773529912,773529912 .long 1294757372,1294757372,1294757372,1294757372 .long 1294757372,1294757372,1294757372,1294757372 .long 1396182291,1396182291,1396182291,1396182291 .long 1396182291,1396182291,1396182291,1396182291 .long 1695183700,1695183700,1695183700,1695183700 .long 1695183700,1695183700,1695183700,1695183700 .long 1986661051,1986661051,1986661051,1986661051 .long 1986661051,1986661051,1986661051,1986661051 .long 2177026350,2177026350,2177026350,2177026350 .long 2177026350,2177026350,2177026350,2177026350 .long 2456956037,2456956037,2456956037,2456956037 .long 2456956037,2456956037,2456956037,2456956037 .long 2730485921,2730485921,2730485921,2730485921 .long 2730485921,2730485921,2730485921,2730485921 .long 2820302411,2820302411,2820302411,2820302411 .long 2820302411,2820302411,2820302411,2820302411 .long 3259730800,3259730800,3259730800,3259730800 .long 3259730800,3259730800,3259730800,3259730800 .long 3345764771,3345764771,3345764771,3345764771 .long 3345764771,3345764771,3345764771,3345764771 .long 3516065817,3516065817,3516065817,3516065817 .long 3516065817,3516065817,3516065817,3516065817 .long 3600352804,3600352804,3600352804,3600352804 .long 3600352804,3600352804,3600352804,3600352804 .long 4094571909,4094571909,4094571909,4094571909 .long 4094571909,4094571909,4094571909,4094571909 .long 275423344,275423344,275423344,275423344 .long 275423344,275423344,275423344,275423344 .long 430227734,430227734,430227734,430227734 .long 430227734,430227734,430227734,430227734 .long 506948616,506948616,506948616,506948616 .long 506948616,506948616,506948616,506948616 .long 659060556,659060556,659060556,659060556 .long 659060556,659060556,659060556,659060556 .long 883997877,883997877,883997877,883997877 .long 883997877,883997877,883997877,883997877 .long 958139571,958139571,958139571,958139571 .long 958139571,958139571,958139571,958139571 .long 1322822218,1322822218,1322822218,1322822218 .long 1322822218,1322822218,1322822218,1322822218 .long 1537002063,1537002063,1537002063,1537002063 .long 1537002063,1537002063,1537002063,1537002063 .long 1747873779,1747873779,1747873779,1747873779 .long 1747873779,1747873779,1747873779,1747873779 .long 1955562222,1955562222,1955562222,1955562222 .long 1955562222,1955562222,1955562222,1955562222 .long 2024104815,2024104815,2024104815,2024104815 .long 2024104815,2024104815,2024104815,2024104815 .long 2227730452,2227730452,2227730452,2227730452 .long 2227730452,2227730452,2227730452,2227730452 .long 2361852424,2361852424,2361852424,2361852424 .long 2361852424,2361852424,2361852424,2361852424 .long 2428436474,2428436474,2428436474,2428436474 .long 2428436474,2428436474,2428436474,2428436474 .long 2756734187,2756734187,2756734187,2756734187 .long 2756734187,2756734187,2756734187,2756734187 .long 3204031479,3204031479,3204031479,3204031479 .long 3204031479,3204031479,3204031479,3204031479 .long 3329325298,3329325298,3329325298,3329325298 .long 3329325298,3329325298,3329325298,3329325298 .Lpbswap: .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f K256_shaext: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Index: head/secure/lib/libcrypto/amd64/sha256-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/sha256-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/sha256-x86_64.S (revision 299481) @@ -1,3050 +1,5360 @@ # $FreeBSD$ + # Do not modify. This file is auto-generated from sha512-x86_64.pl. .text .globl sha256_block_data_order .type sha256_block_data_order,@function .align 16 sha256_block_data_order: leaq OPENSSL_ia32cap_P(%rip),%r11 movl 0(%r11),%r9d movl 4(%r11),%r10d movl 8(%r11),%r11d testl $536870912,%r11d jnz _shaext_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut testl $512,%r10d jnz .Lssse3_shortcut pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r11 shlq $4,%rdx subq $64+32,%rsp leaq (%rsi,%rdx,4),%rdx andq $-64,%rsp movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) movq %r11,64+24(%rsp) .Lprologue: movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx movl 12(%rdi),%edx movl 16(%rdi),%r8d movl 20(%rdi),%r9d movl 24(%rdi),%r10d movl 28(%rdi),%r11d jmp .Lloop .align 16 .Lloop: movl %ebx,%edi leaq K256(%rip),%rbp xorl %ecx,%edi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r15d movl %r12d,0(%rsp) xorl %eax,%r14d andl %r8d,%r15d rorl $5,%r13d addl %r11d,%r12d xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d addl %r15d,%r12d movl %eax,%r15d addl (%rbp),%r12d xorl %eax,%r14d xorl %ebx,%r15d rorl $6,%r13d movl %ebx,%r11d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d leaq 4(%rbp),%rbp addl %r14d,%r11d movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d movl %r8d,%edi xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%edi movl %r12d,4(%rsp) xorl %r11d,%r14d andl %edx,%edi rorl $5,%r13d addl %r10d,%r12d xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d addl %edi,%r12d movl %r11d,%edi addl (%rbp),%r12d xorl %r11d,%r14d xorl %eax,%edi rorl $6,%r13d movl %eax,%r10d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d leaq 4(%rbp),%rbp addl %r14d,%r10d movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r15d movl %r12d,8(%rsp) xorl %r10d,%r14d andl %ecx,%r15d rorl $5,%r13d addl %r9d,%r12d xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r15d addl (%rbp),%r12d xorl %r10d,%r14d xorl %r11d,%r15d rorl $6,%r13d movl %r11d,%r9d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d leaq 4(%rbp),%rbp addl %r14d,%r9d movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d movl %ecx,%edi xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%edi movl %r12d,12(%rsp) xorl %r9d,%r14d andl %ebx,%edi rorl $5,%r13d addl %r8d,%r12d xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d addl %edi,%r12d movl %r9d,%edi addl (%rbp),%r12d xorl %r9d,%r14d xorl %r10d,%edi rorl $6,%r13d movl %r10d,%r8d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d leaq 20(%rbp),%rbp addl %r14d,%r8d movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r15d movl %r12d,16(%rsp) xorl %r8d,%r14d andl %eax,%r15d rorl $5,%r13d addl %edx,%r12d xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d addl %r15d,%r12d movl %r8d,%r15d addl (%rbp),%r12d xorl %r8d,%r14d xorl %r9d,%r15d rorl $6,%r13d movl %r9d,%edx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx leaq 4(%rbp),%rbp addl %r14d,%edx movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d movl %eax,%edi xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%edi movl %r12d,20(%rsp) xorl %edx,%r14d andl %r11d,%edi rorl $5,%r13d addl %ecx,%r12d xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d addl %edi,%r12d movl %edx,%edi addl (%rbp),%r12d xorl %edx,%r14d xorl %r8d,%edi rorl $6,%r13d movl %r8d,%ecx andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx leaq 4(%rbp),%rbp addl %r14d,%ecx movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r15d movl %r12d,24(%rsp) xorl %ecx,%r14d andl %r10d,%r15d rorl $5,%r13d addl %ebx,%r12d xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r15d addl (%rbp),%r12d xorl %ecx,%r14d xorl %edx,%r15d rorl $6,%r13d movl %edx,%ebx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx leaq 4(%rbp),%rbp addl %r14d,%ebx movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d movl %r10d,%edi xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%edi movl %r12d,28(%rsp) xorl %ebx,%r14d andl %r9d,%edi rorl $5,%r13d addl %eax,%r12d xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d addl %edi,%r12d movl %ebx,%edi addl (%rbp),%r12d xorl %ebx,%r14d xorl %ecx,%edi rorl $6,%r13d movl %ecx,%eax andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax leaq 20(%rbp),%rbp addl %r14d,%eax movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r15d movl %r12d,32(%rsp) xorl %eax,%r14d andl %r8d,%r15d rorl $5,%r13d addl %r11d,%r12d xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d addl %r15d,%r12d movl %eax,%r15d addl (%rbp),%r12d xorl %eax,%r14d xorl %ebx,%r15d rorl $6,%r13d movl %ebx,%r11d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d leaq 4(%rbp),%rbp addl %r14d,%r11d movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d movl %r8d,%edi xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%edi movl %r12d,36(%rsp) xorl %r11d,%r14d andl %edx,%edi rorl $5,%r13d addl %r10d,%r12d xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d addl %edi,%r12d movl %r11d,%edi addl (%rbp),%r12d xorl %r11d,%r14d xorl %eax,%edi rorl $6,%r13d movl %eax,%r10d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d leaq 4(%rbp),%rbp addl %r14d,%r10d movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r15d movl %r12d,40(%rsp) xorl %r10d,%r14d andl %ecx,%r15d rorl $5,%r13d addl %r9d,%r12d xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r15d addl (%rbp),%r12d xorl %r10d,%r14d xorl %r11d,%r15d rorl $6,%r13d movl %r11d,%r9d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d leaq 4(%rbp),%rbp addl %r14d,%r9d movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d movl %ecx,%edi xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%edi movl %r12d,44(%rsp) xorl %r9d,%r14d andl %ebx,%edi rorl $5,%r13d addl %r8d,%r12d xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d addl %edi,%r12d movl %r9d,%edi addl (%rbp),%r12d xorl %r9d,%r14d xorl %r10d,%edi rorl $6,%r13d movl %r10d,%r8d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d leaq 20(%rbp),%rbp addl %r14d,%r8d movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r15d movl %r12d,48(%rsp) xorl %r8d,%r14d andl %eax,%r15d rorl $5,%r13d addl %edx,%r12d xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d addl %r15d,%r12d movl %r8d,%r15d addl (%rbp),%r12d xorl %r8d,%r14d xorl %r9d,%r15d rorl $6,%r13d movl %r9d,%edx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx leaq 4(%rbp),%rbp addl %r14d,%edx movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d movl %eax,%edi xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%edi movl %r12d,52(%rsp) xorl %edx,%r14d andl %r11d,%edi rorl $5,%r13d addl %ecx,%r12d xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d addl %edi,%r12d movl %edx,%edi addl (%rbp),%r12d xorl %edx,%r14d xorl %r8d,%edi rorl $6,%r13d movl %r8d,%ecx andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx leaq 4(%rbp),%rbp addl %r14d,%ecx movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r15d movl %r12d,56(%rsp) xorl %ecx,%r14d andl %r10d,%r15d rorl $5,%r13d addl %ebx,%r12d xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r15d addl (%rbp),%r12d xorl %ecx,%r14d xorl %edx,%r15d rorl $6,%r13d movl %edx,%ebx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx leaq 4(%rbp),%rbp addl %r14d,%ebx movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d movl %r10d,%edi xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%edi movl %r12d,60(%rsp) xorl %ebx,%r14d andl %r9d,%edi rorl $5,%r13d addl %eax,%r12d xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d addl %edi,%r12d movl %ebx,%edi addl (%rbp),%r12d xorl %ebx,%r14d xorl %ecx,%edi rorl $6,%r13d movl %ecx,%eax andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax leaq 20(%rbp),%rbp jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movl 4(%rsp),%r13d movl 56(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%eax movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 36(%rsp),%r12d addl 0(%rsp),%r12d movl %r8d,%r13d addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r15d movl %r12d,0(%rsp) xorl %eax,%r14d andl %r8d,%r15d rorl $5,%r13d addl %r11d,%r12d xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d addl %r15d,%r12d movl %eax,%r15d addl (%rbp),%r12d xorl %eax,%r14d xorl %ebx,%r15d rorl $6,%r13d movl %ebx,%r11d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d leaq 4(%rbp),%rbp movl 8(%rsp),%r13d movl 60(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r11d movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 40(%rsp),%r12d addl 4(%rsp),%r12d movl %edx,%r13d addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d movl %r8d,%edi xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%edi movl %r12d,4(%rsp) xorl %r11d,%r14d andl %edx,%edi rorl $5,%r13d addl %r10d,%r12d xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d addl %edi,%r12d movl %r11d,%edi addl (%rbp),%r12d xorl %r11d,%r14d xorl %eax,%edi rorl $6,%r13d movl %eax,%r10d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d leaq 4(%rbp),%rbp movl 12(%rsp),%r13d movl 0(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r10d movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 44(%rsp),%r12d addl 8(%rsp),%r12d movl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r15d movl %r12d,8(%rsp) xorl %r10d,%r14d andl %ecx,%r15d rorl $5,%r13d addl %r9d,%r12d xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r15d addl (%rbp),%r12d xorl %r10d,%r14d xorl %r11d,%r15d rorl $6,%r13d movl %r11d,%r9d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d leaq 4(%rbp),%rbp movl 16(%rsp),%r13d movl 4(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r9d movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 48(%rsp),%r12d addl 12(%rsp),%r12d movl %ebx,%r13d addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d movl %ecx,%edi xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%edi movl %r12d,12(%rsp) xorl %r9d,%r14d andl %ebx,%edi rorl $5,%r13d addl %r8d,%r12d xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d addl %edi,%r12d movl %r9d,%edi addl (%rbp),%r12d xorl %r9d,%r14d xorl %r10d,%edi rorl $6,%r13d movl %r10d,%r8d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d leaq 20(%rbp),%rbp movl 20(%rsp),%r13d movl 8(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r8d movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 52(%rsp),%r12d addl 16(%rsp),%r12d movl %eax,%r13d addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r15d movl %r12d,16(%rsp) xorl %r8d,%r14d andl %eax,%r15d rorl $5,%r13d addl %edx,%r12d xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d addl %r15d,%r12d movl %r8d,%r15d addl (%rbp),%r12d xorl %r8d,%r14d xorl %r9d,%r15d rorl $6,%r13d movl %r9d,%edx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx leaq 4(%rbp),%rbp movl 24(%rsp),%r13d movl 12(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%edx movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 56(%rsp),%r12d addl 20(%rsp),%r12d movl %r11d,%r13d addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d movl %eax,%edi xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%edi movl %r12d,20(%rsp) xorl %edx,%r14d andl %r11d,%edi rorl $5,%r13d addl %ecx,%r12d xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d addl %edi,%r12d movl %edx,%edi addl (%rbp),%r12d xorl %edx,%r14d xorl %r8d,%edi rorl $6,%r13d movl %r8d,%ecx andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx leaq 4(%rbp),%rbp movl 28(%rsp),%r13d movl 16(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%ecx movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 60(%rsp),%r12d addl 24(%rsp),%r12d movl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r15d movl %r12d,24(%rsp) xorl %ecx,%r14d andl %r10d,%r15d rorl $5,%r13d addl %ebx,%r12d xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r15d addl (%rbp),%r12d xorl %ecx,%r14d xorl %edx,%r15d rorl $6,%r13d movl %edx,%ebx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx leaq 4(%rbp),%rbp movl 32(%rsp),%r13d movl 20(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%ebx movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 0(%rsp),%r12d addl 28(%rsp),%r12d movl %r9d,%r13d addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d movl %r10d,%edi xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%edi movl %r12d,28(%rsp) xorl %ebx,%r14d andl %r9d,%edi rorl $5,%r13d addl %eax,%r12d xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d addl %edi,%r12d movl %ebx,%edi addl (%rbp),%r12d xorl %ebx,%r14d xorl %ecx,%edi rorl $6,%r13d movl %ecx,%eax andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax leaq 20(%rbp),%rbp movl 36(%rsp),%r13d movl 24(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%eax movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 4(%rsp),%r12d addl 32(%rsp),%r12d movl %r8d,%r13d addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r15d movl %r12d,32(%rsp) xorl %eax,%r14d andl %r8d,%r15d rorl $5,%r13d addl %r11d,%r12d xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d addl %r15d,%r12d movl %eax,%r15d addl (%rbp),%r12d xorl %eax,%r14d xorl %ebx,%r15d rorl $6,%r13d movl %ebx,%r11d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d leaq 4(%rbp),%rbp movl 40(%rsp),%r13d movl 28(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r11d movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 8(%rsp),%r12d addl 36(%rsp),%r12d movl %edx,%r13d addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d movl %r8d,%edi xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%edi movl %r12d,36(%rsp) xorl %r11d,%r14d andl %edx,%edi rorl $5,%r13d addl %r10d,%r12d xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d addl %edi,%r12d movl %r11d,%edi addl (%rbp),%r12d xorl %r11d,%r14d xorl %eax,%edi rorl $6,%r13d movl %eax,%r10d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d leaq 4(%rbp),%rbp movl 44(%rsp),%r13d movl 32(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r10d movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 12(%rsp),%r12d addl 40(%rsp),%r12d movl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r15d movl %r12d,40(%rsp) xorl %r10d,%r14d andl %ecx,%r15d rorl $5,%r13d addl %r9d,%r12d xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d addl %r15d,%r12d movl %r10d,%r15d addl (%rbp),%r12d xorl %r10d,%r14d xorl %r11d,%r15d rorl $6,%r13d movl %r11d,%r9d andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d leaq 4(%rbp),%rbp movl 48(%rsp),%r13d movl 36(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r9d movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 16(%rsp),%r12d addl 44(%rsp),%r12d movl %ebx,%r13d addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d movl %ecx,%edi xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%edi movl %r12d,44(%rsp) xorl %r9d,%r14d andl %ebx,%edi rorl $5,%r13d addl %r8d,%r12d xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d addl %edi,%r12d movl %r9d,%edi addl (%rbp),%r12d xorl %r9d,%r14d xorl %r10d,%edi rorl $6,%r13d movl %r10d,%r8d andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d leaq 20(%rbp),%rbp movl 52(%rsp),%r13d movl 40(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%r8d movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 20(%rsp),%r12d addl 48(%rsp),%r12d movl %eax,%r13d addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r15d movl %r12d,48(%rsp) xorl %r8d,%r14d andl %eax,%r15d rorl $5,%r13d addl %edx,%r12d xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d addl %r15d,%r12d movl %r8d,%r15d addl (%rbp),%r12d xorl %r8d,%r14d xorl %r9d,%r15d rorl $6,%r13d movl %r9d,%edx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx leaq 4(%rbp),%rbp movl 56(%rsp),%r13d movl 44(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%edx movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 24(%rsp),%r12d addl 52(%rsp),%r12d movl %r11d,%r13d addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d movl %eax,%edi xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%edi movl %r12d,52(%rsp) xorl %edx,%r14d andl %r11d,%edi rorl $5,%r13d addl %ecx,%r12d xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d addl %edi,%r12d movl %edx,%edi addl (%rbp),%r12d xorl %edx,%r14d xorl %r8d,%edi rorl $6,%r13d movl %r8d,%ecx andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx leaq 4(%rbp),%rbp movl 60(%rsp),%r13d movl 48(%rsp),%r15d movl %r13d,%r12d rorl $11,%r13d addl %r14d,%ecx movl %r15d,%r14d rorl $2,%r15d xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d xorl %r13d,%r12d xorl %r14d,%r15d addl 28(%rsp),%r12d addl 56(%rsp),%r12d movl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r15d movl %r12d,56(%rsp) xorl %ecx,%r14d andl %r10d,%r15d rorl $5,%r13d addl %ebx,%r12d xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d addl %r15d,%r12d movl %ecx,%r15d addl (%rbp),%r12d xorl %ecx,%r14d xorl %edx,%r15d rorl $6,%r13d movl %edx,%ebx andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx leaq 4(%rbp),%rbp movl 0(%rsp),%r13d movl 52(%rsp),%edi movl %r13d,%r12d rorl $11,%r13d addl %r14d,%ebx movl %edi,%r14d rorl $2,%edi xorl %r12d,%r13d shrl $3,%r12d rorl $7,%r13d xorl %r14d,%edi shrl $10,%r14d rorl $17,%edi xorl %r13d,%r12d xorl %r14d,%edi addl 32(%rsp),%r12d addl 60(%rsp),%r12d movl %r9d,%r13d addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d movl %r10d,%edi xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%edi movl %r12d,60(%rsp) xorl %ebx,%r14d andl %r9d,%edi rorl $5,%r13d addl %eax,%r12d xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d addl %edi,%r12d movl %ebx,%edi addl (%rbp),%r12d xorl %ebx,%r14d xorl %ecx,%edi rorl $6,%r13d movl %ecx,%eax andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax leaq 20(%rbp),%rbp cmpb $0,3(%rbp) jnz .Lrounds_16_xx movq 64+0(%rsp),%rdi addl %r14d,%eax leaq 64(%rsi),%rsi addl 0(%rdi),%eax addl 4(%rdi),%ebx addl 8(%rdi),%ecx addl 12(%rdi),%edx addl 16(%rdi),%r8d addl 20(%rdi),%r9d addl 24(%rdi),%r10d addl 28(%rdi),%r11d cmpq 64+16(%rsp),%rsi movl %eax,0(%rdi) movl %ebx,4(%rdi) movl %ecx,8(%rdi) movl %edx,12(%rdi) movl %r8d,16(%rdi) movl %r9d,20(%rdi) movl %r10d,24(%rdi) movl %r11d,28(%rdi) jb .Lloop movq 64+24(%rsp),%rsi movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha256_block_data_order,.-sha256_block_data_order .align 64 .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff .long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .type sha256_block_data_order_shaext,@function .align 64 sha256_block_data_order_shaext: _shaext_shortcut: leaq K256+128(%rip),%rcx movdqu (%rdi),%xmm1 movdqu 16(%rdi),%xmm2 movdqa 512-128(%rcx),%xmm7 pshufd $0x1b,%xmm1,%xmm0 pshufd $0xb1,%xmm1,%xmm1 pshufd $0x1b,%xmm2,%xmm2 movdqa %xmm7,%xmm8 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 jmp .Loop_shaext .align 16 .Loop_shaext: movdqu (%rsi),%xmm3 movdqu 16(%rsi),%xmm4 movdqu 32(%rsi),%xmm5 .byte 102,15,56,0,223 movdqu 48(%rsi),%xmm6 movdqa 0-128(%rcx),%xmm0 paddd %xmm3,%xmm0 .byte 102,15,56,0,231 movdqa %xmm2,%xmm10 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 nop movdqa %xmm1,%xmm9 .byte 15,56,203,202 movdqa 32-128(%rcx),%xmm0 paddd %xmm4,%xmm0 .byte 102,15,56,0,239 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 leaq 64(%rsi),%rsi .byte 15,56,204,220 .byte 15,56,203,202 movdqa 64-128(%rcx),%xmm0 paddd %xmm5,%xmm0 .byte 102,15,56,0,247 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa 96-128(%rcx),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa 128-128(%rcx),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 160-128(%rcx),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa 192-128(%rcx),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa 224-128(%rcx),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa 256-128(%rcx),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 288-128(%rcx),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa 320-128(%rcx),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa 352-128(%rcx),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa 384-128(%rcx),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 416-128(%rcx),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 .byte 15,56,203,202 paddd %xmm7,%xmm6 movdqa 448-128(%rcx),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 .byte 15,56,205,245 movdqa %xmm8,%xmm7 .byte 15,56,203,202 movdqa 480-128(%rcx),%xmm0 paddd %xmm6,%xmm0 nop .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 decq %rdx nop .byte 15,56,203,202 paddd %xmm10,%xmm2 paddd %xmm9,%xmm1 jnz .Loop_shaext pshufd $0xb1,%xmm2,%xmm2 pshufd $0x1b,%xmm1,%xmm7 pshufd $0xb1,%xmm1,%xmm1 punpckhqdq %xmm2,%xmm1 .byte 102,15,58,15,215,8 movdqu %xmm1,(%rdi) movdqu %xmm2,16(%rdi) .byte 0xf3,0xc3 .size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext .type sha256_block_data_order_ssse3,@function .align 64 sha256_block_data_order_ssse3: .Lssse3_shortcut: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r11 shlq $4,%rdx subq $96,%rsp leaq (%rsi,%rdx,4),%rdx andq $-64,%rsp movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) movq %r11,64+24(%rsp) .Lprologue_ssse3: movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx movl 12(%rdi),%edx movl 16(%rdi),%r8d movl 20(%rdi),%r9d movl 24(%rdi),%r10d movl 28(%rdi),%r11d jmp .Lloop_ssse3 .align 16 .Lloop_ssse3: movdqa K256+512(%rip),%xmm7 movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 .byte 102,15,56,0,199 movdqu 48(%rsi),%xmm3 leaq K256(%rip),%rbp .byte 102,15,56,0,207 movdqa 0(%rbp),%xmm4 movdqa 32(%rbp),%xmm5 .byte 102,15,56,0,215 paddd %xmm0,%xmm4 movdqa 64(%rbp),%xmm6 .byte 102,15,56,0,223 movdqa 96(%rbp),%xmm7 paddd %xmm1,%xmm5 paddd %xmm2,%xmm6 paddd %xmm3,%xmm7 movdqa %xmm4,0(%rsp) movl %eax,%r14d movdqa %xmm5,16(%rsp) movl %ebx,%edi movdqa %xmm6,32(%rsp) xorl %ecx,%edi movdqa %xmm7,48(%rsp) movl %r8d,%r13d jmp .Lssse3_00_47 .align 16 .Lssse3_00_47: subq $-128,%rbp rorl $14,%r13d movdqa %xmm1,%xmm4 movl %r14d,%eax movl %r9d,%r12d movdqa %xmm3,%xmm7 rorl $9,%r14d xorl %r8d,%r13d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d .byte 102,15,58,15,224,4 andl %r8d,%r12d xorl %r8d,%r13d .byte 102,15,58,15,250,4 addl 0(%rsp),%r11d movl %eax,%r15d xorl %r10d,%r12d rorl $11,%r14d movdqa %xmm4,%xmm5 xorl %ebx,%r15d addl %r12d,%r11d movdqa %xmm4,%xmm6 rorl $6,%r13d andl %r15d,%edi psrld $3,%xmm4 xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%edi paddd %xmm7,%xmm0 rorl $2,%r14d addl %r11d,%edx psrld $7,%xmm6 addl %edi,%r11d movl %edx,%r13d pshufd $250,%xmm3,%xmm7 addl %r11d,%r14d rorl $14,%r13d pslld $14,%xmm5 movl %r14d,%r11d movl %r8d,%r12d pxor %xmm6,%xmm4 rorl $9,%r14d xorl %edx,%r13d xorl %r9d,%r12d rorl $5,%r13d psrld $11,%xmm6 xorl %r11d,%r14d pxor %xmm5,%xmm4 andl %edx,%r12d xorl %edx,%r13d pslld $11,%xmm5 addl 4(%rsp),%r10d movl %r11d,%edi pxor %xmm6,%xmm4 xorl %r9d,%r12d rorl $11,%r14d movdqa %xmm7,%xmm6 xorl %eax,%edi addl %r12d,%r10d pxor %xmm5,%xmm4 rorl $6,%r13d andl %edi,%r15d xorl %r11d,%r14d psrld $10,%xmm7 addl %r13d,%r10d xorl %eax,%r15d paddd %xmm4,%xmm0 rorl $2,%r14d addl %r10d,%ecx psrlq $17,%xmm6 addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d pxor %xmm6,%xmm7 rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d rorl $9,%r14d psrlq $2,%xmm6 xorl %ecx,%r13d xorl %r8d,%r12d pxor %xmm6,%xmm7 rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d pshufd $128,%xmm7,%xmm7 xorl %ecx,%r13d addl 8(%rsp),%r9d movl %r10d,%r15d psrldq $8,%xmm7 xorl %r8d,%r12d rorl $11,%r14d xorl %r11d,%r15d addl %r12d,%r9d rorl $6,%r13d paddd %xmm7,%xmm0 andl %r15d,%edi xorl %r10d,%r14d addl %r13d,%r9d pshufd $80,%xmm0,%xmm7 xorl %r11d,%edi rorl $2,%r14d addl %r9d,%ebx movdqa %xmm7,%xmm6 addl %edi,%r9d movl %ebx,%r13d psrld $10,%xmm7 addl %r9d,%r14d rorl $14,%r13d psrlq $17,%xmm6 movl %r14d,%r9d movl %ecx,%r12d pxor %xmm6,%xmm7 rorl $9,%r14d xorl %ebx,%r13d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d psrlq $2,%xmm6 andl %ebx,%r12d xorl %ebx,%r13d addl 12(%rsp),%r8d pxor %xmm6,%xmm7 movl %r9d,%edi xorl %edx,%r12d rorl $11,%r14d pshufd $8,%xmm7,%xmm7 xorl %r10d,%edi addl %r12d,%r8d movdqa 0(%rbp),%xmm6 rorl $6,%r13d andl %edi,%r15d pslldq $8,%xmm7 xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d paddd %xmm7,%xmm0 rorl $2,%r14d addl %r8d,%eax addl %r15d,%r8d paddd %xmm0,%xmm6 movl %eax,%r13d addl %r8d,%r14d movdqa %xmm6,0(%rsp) rorl $14,%r13d movdqa %xmm2,%xmm4 movl %r14d,%r8d movl %ebx,%r12d movdqa %xmm0,%xmm7 rorl $9,%r14d xorl %eax,%r13d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d .byte 102,15,58,15,225,4 andl %eax,%r12d xorl %eax,%r13d .byte 102,15,58,15,251,4 addl 16(%rsp),%edx movl %r8d,%r15d xorl %ecx,%r12d rorl $11,%r14d movdqa %xmm4,%xmm5 xorl %r9d,%r15d addl %r12d,%edx movdqa %xmm4,%xmm6 rorl $6,%r13d andl %r15d,%edi psrld $3,%xmm4 xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%edi paddd %xmm7,%xmm1 rorl $2,%r14d addl %edx,%r11d psrld $7,%xmm6 addl %edi,%edx movl %r11d,%r13d pshufd $250,%xmm0,%xmm7 addl %edx,%r14d rorl $14,%r13d pslld $14,%xmm5 movl %r14d,%edx movl %eax,%r12d pxor %xmm6,%xmm4 rorl $9,%r14d xorl %r11d,%r13d xorl %ebx,%r12d rorl $5,%r13d psrld $11,%xmm6 xorl %edx,%r14d pxor %xmm5,%xmm4 andl %r11d,%r12d xorl %r11d,%r13d pslld $11,%xmm5 addl 20(%rsp),%ecx movl %edx,%edi pxor %xmm6,%xmm4 xorl %ebx,%r12d rorl $11,%r14d movdqa %xmm7,%xmm6 xorl %r8d,%edi addl %r12d,%ecx pxor %xmm5,%xmm4 rorl $6,%r13d andl %edi,%r15d xorl %edx,%r14d psrld $10,%xmm7 addl %r13d,%ecx xorl %r8d,%r15d paddd %xmm4,%xmm1 rorl $2,%r14d addl %ecx,%r10d psrlq $17,%xmm6 addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d pxor %xmm6,%xmm7 rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d rorl $9,%r14d psrlq $2,%xmm6 xorl %r10d,%r13d xorl %eax,%r12d pxor %xmm6,%xmm7 rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d pshufd $128,%xmm7,%xmm7 xorl %r10d,%r13d addl 24(%rsp),%ebx movl %ecx,%r15d psrldq $8,%xmm7 xorl %eax,%r12d rorl $11,%r14d xorl %edx,%r15d addl %r12d,%ebx rorl $6,%r13d paddd %xmm7,%xmm1 andl %r15d,%edi xorl %ecx,%r14d addl %r13d,%ebx pshufd $80,%xmm1,%xmm7 xorl %edx,%edi rorl $2,%r14d addl %ebx,%r9d movdqa %xmm7,%xmm6 addl %edi,%ebx movl %r9d,%r13d psrld $10,%xmm7 addl %ebx,%r14d rorl $14,%r13d psrlq $17,%xmm6 movl %r14d,%ebx movl %r10d,%r12d pxor %xmm6,%xmm7 rorl $9,%r14d xorl %r9d,%r13d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d psrlq $2,%xmm6 andl %r9d,%r12d xorl %r9d,%r13d addl 28(%rsp),%eax pxor %xmm6,%xmm7 movl %ebx,%edi xorl %r11d,%r12d rorl $11,%r14d pshufd $8,%xmm7,%xmm7 xorl %ecx,%edi addl %r12d,%eax movdqa 32(%rbp),%xmm6 rorl $6,%r13d andl %edi,%r15d pslldq $8,%xmm7 xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d paddd %xmm7,%xmm1 rorl $2,%r14d addl %eax,%r8d addl %r15d,%eax paddd %xmm1,%xmm6 movl %r8d,%r13d addl %eax,%r14d movdqa %xmm6,16(%rsp) rorl $14,%r13d movdqa %xmm3,%xmm4 movl %r14d,%eax movl %r9d,%r12d movdqa %xmm1,%xmm7 rorl $9,%r14d xorl %r8d,%r13d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d .byte 102,15,58,15,226,4 andl %r8d,%r12d xorl %r8d,%r13d .byte 102,15,58,15,248,4 addl 32(%rsp),%r11d movl %eax,%r15d xorl %r10d,%r12d rorl $11,%r14d movdqa %xmm4,%xmm5 xorl %ebx,%r15d addl %r12d,%r11d movdqa %xmm4,%xmm6 rorl $6,%r13d andl %r15d,%edi psrld $3,%xmm4 xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%edi paddd %xmm7,%xmm2 rorl $2,%r14d addl %r11d,%edx psrld $7,%xmm6 addl %edi,%r11d movl %edx,%r13d pshufd $250,%xmm1,%xmm7 addl %r11d,%r14d rorl $14,%r13d pslld $14,%xmm5 movl %r14d,%r11d movl %r8d,%r12d pxor %xmm6,%xmm4 rorl $9,%r14d xorl %edx,%r13d xorl %r9d,%r12d rorl $5,%r13d psrld $11,%xmm6 xorl %r11d,%r14d pxor %xmm5,%xmm4 andl %edx,%r12d xorl %edx,%r13d pslld $11,%xmm5 addl 36(%rsp),%r10d movl %r11d,%edi pxor %xmm6,%xmm4 xorl %r9d,%r12d rorl $11,%r14d movdqa %xmm7,%xmm6 xorl %eax,%edi addl %r12d,%r10d pxor %xmm5,%xmm4 rorl $6,%r13d andl %edi,%r15d xorl %r11d,%r14d psrld $10,%xmm7 addl %r13d,%r10d xorl %eax,%r15d paddd %xmm4,%xmm2 rorl $2,%r14d addl %r10d,%ecx psrlq $17,%xmm6 addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d pxor %xmm6,%xmm7 rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d rorl $9,%r14d psrlq $2,%xmm6 xorl %ecx,%r13d xorl %r8d,%r12d pxor %xmm6,%xmm7 rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d pshufd $128,%xmm7,%xmm7 xorl %ecx,%r13d addl 40(%rsp),%r9d movl %r10d,%r15d psrldq $8,%xmm7 xorl %r8d,%r12d rorl $11,%r14d xorl %r11d,%r15d addl %r12d,%r9d rorl $6,%r13d paddd %xmm7,%xmm2 andl %r15d,%edi xorl %r10d,%r14d addl %r13d,%r9d pshufd $80,%xmm2,%xmm7 xorl %r11d,%edi rorl $2,%r14d addl %r9d,%ebx movdqa %xmm7,%xmm6 addl %edi,%r9d movl %ebx,%r13d psrld $10,%xmm7 addl %r9d,%r14d rorl $14,%r13d psrlq $17,%xmm6 movl %r14d,%r9d movl %ecx,%r12d pxor %xmm6,%xmm7 rorl $9,%r14d xorl %ebx,%r13d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d psrlq $2,%xmm6 andl %ebx,%r12d xorl %ebx,%r13d addl 44(%rsp),%r8d pxor %xmm6,%xmm7 movl %r9d,%edi xorl %edx,%r12d rorl $11,%r14d pshufd $8,%xmm7,%xmm7 xorl %r10d,%edi addl %r12d,%r8d movdqa 64(%rbp),%xmm6 rorl $6,%r13d andl %edi,%r15d pslldq $8,%xmm7 xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d paddd %xmm7,%xmm2 rorl $2,%r14d addl %r8d,%eax addl %r15d,%r8d paddd %xmm2,%xmm6 movl %eax,%r13d addl %r8d,%r14d movdqa %xmm6,32(%rsp) rorl $14,%r13d movdqa %xmm0,%xmm4 movl %r14d,%r8d movl %ebx,%r12d movdqa %xmm2,%xmm7 rorl $9,%r14d xorl %eax,%r13d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d .byte 102,15,58,15,227,4 andl %eax,%r12d xorl %eax,%r13d .byte 102,15,58,15,249,4 addl 48(%rsp),%edx movl %r8d,%r15d xorl %ecx,%r12d rorl $11,%r14d movdqa %xmm4,%xmm5 xorl %r9d,%r15d addl %r12d,%edx movdqa %xmm4,%xmm6 rorl $6,%r13d andl %r15d,%edi psrld $3,%xmm4 xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%edi paddd %xmm7,%xmm3 rorl $2,%r14d addl %edx,%r11d psrld $7,%xmm6 addl %edi,%edx movl %r11d,%r13d pshufd $250,%xmm2,%xmm7 addl %edx,%r14d rorl $14,%r13d pslld $14,%xmm5 movl %r14d,%edx movl %eax,%r12d pxor %xmm6,%xmm4 rorl $9,%r14d xorl %r11d,%r13d xorl %ebx,%r12d rorl $5,%r13d psrld $11,%xmm6 xorl %edx,%r14d pxor %xmm5,%xmm4 andl %r11d,%r12d xorl %r11d,%r13d pslld $11,%xmm5 addl 52(%rsp),%ecx movl %edx,%edi pxor %xmm6,%xmm4 xorl %ebx,%r12d rorl $11,%r14d movdqa %xmm7,%xmm6 xorl %r8d,%edi addl %r12d,%ecx pxor %xmm5,%xmm4 rorl $6,%r13d andl %edi,%r15d xorl %edx,%r14d psrld $10,%xmm7 addl %r13d,%ecx xorl %r8d,%r15d paddd %xmm4,%xmm3 rorl $2,%r14d addl %ecx,%r10d psrlq $17,%xmm6 addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d pxor %xmm6,%xmm7 rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d rorl $9,%r14d psrlq $2,%xmm6 xorl %r10d,%r13d xorl %eax,%r12d pxor %xmm6,%xmm7 rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d pshufd $128,%xmm7,%xmm7 xorl %r10d,%r13d addl 56(%rsp),%ebx movl %ecx,%r15d psrldq $8,%xmm7 xorl %eax,%r12d rorl $11,%r14d xorl %edx,%r15d addl %r12d,%ebx rorl $6,%r13d paddd %xmm7,%xmm3 andl %r15d,%edi xorl %ecx,%r14d addl %r13d,%ebx pshufd $80,%xmm3,%xmm7 xorl %edx,%edi rorl $2,%r14d addl %ebx,%r9d movdqa %xmm7,%xmm6 addl %edi,%ebx movl %r9d,%r13d psrld $10,%xmm7 addl %ebx,%r14d rorl $14,%r13d psrlq $17,%xmm6 movl %r14d,%ebx movl %r10d,%r12d pxor %xmm6,%xmm7 rorl $9,%r14d xorl %r9d,%r13d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d psrlq $2,%xmm6 andl %r9d,%r12d xorl %r9d,%r13d addl 60(%rsp),%eax pxor %xmm6,%xmm7 movl %ebx,%edi xorl %r11d,%r12d rorl $11,%r14d pshufd $8,%xmm7,%xmm7 xorl %ecx,%edi addl %r12d,%eax movdqa 96(%rbp),%xmm6 rorl $6,%r13d andl %edi,%r15d pslldq $8,%xmm7 xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d paddd %xmm7,%xmm3 rorl $2,%r14d addl %eax,%r8d addl %r15d,%eax paddd %xmm3,%xmm6 movl %r8d,%r13d addl %eax,%r14d movdqa %xmm6,48(%rsp) cmpb $0,131(%rbp) jne .Lssse3_00_47 rorl $14,%r13d movl %r14d,%eax movl %r9d,%r12d rorl $9,%r14d xorl %r8d,%r13d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d andl %r8d,%r12d xorl %r8d,%r13d addl 0(%rsp),%r11d movl %eax,%r15d xorl %r10d,%r12d rorl $11,%r14d xorl %ebx,%r15d addl %r12d,%r11d rorl $6,%r13d andl %r15d,%edi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%edi rorl $2,%r14d addl %r11d,%edx addl %edi,%r11d movl %edx,%r13d addl %r11d,%r14d rorl $14,%r13d movl %r14d,%r11d movl %r8d,%r12d rorl $9,%r14d xorl %edx,%r13d xorl %r9d,%r12d rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d xorl %edx,%r13d addl 4(%rsp),%r10d movl %r11d,%edi xorl %r9d,%r12d rorl $11,%r14d xorl %eax,%edi addl %r12d,%r10d rorl $6,%r13d andl %edi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d rorl $2,%r14d addl %r10d,%ecx addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d rorl $9,%r14d xorl %ecx,%r13d xorl %r8d,%r12d rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d xorl %ecx,%r13d addl 8(%rsp),%r9d movl %r10d,%r15d xorl %r8d,%r12d rorl $11,%r14d xorl %r11d,%r15d addl %r12d,%r9d rorl $6,%r13d andl %r15d,%edi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%edi rorl $2,%r14d addl %r9d,%ebx addl %edi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d rorl $9,%r14d xorl %ebx,%r13d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d xorl %ebx,%r13d addl 12(%rsp),%r8d movl %r9d,%edi xorl %edx,%r12d rorl $11,%r14d xorl %r10d,%edi addl %r12d,%r8d rorl $6,%r13d andl %edi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d rorl $2,%r14d addl %r8d,%eax addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d rorl $14,%r13d movl %r14d,%r8d movl %ebx,%r12d rorl $9,%r14d xorl %eax,%r13d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d andl %eax,%r12d xorl %eax,%r13d addl 16(%rsp),%edx movl %r8d,%r15d xorl %ecx,%r12d rorl $11,%r14d xorl %r9d,%r15d addl %r12d,%edx rorl $6,%r13d andl %r15d,%edi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%edi rorl $2,%r14d addl %edx,%r11d addl %edi,%edx movl %r11d,%r13d addl %edx,%r14d rorl $14,%r13d movl %r14d,%edx movl %eax,%r12d rorl $9,%r14d xorl %r11d,%r13d xorl %ebx,%r12d rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d xorl %r11d,%r13d addl 20(%rsp),%ecx movl %edx,%edi xorl %ebx,%r12d rorl $11,%r14d xorl %r8d,%edi addl %r12d,%ecx rorl $6,%r13d andl %edi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d rorl $2,%r14d addl %ecx,%r10d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d rorl $9,%r14d xorl %r10d,%r13d xorl %eax,%r12d rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d xorl %r10d,%r13d addl 24(%rsp),%ebx movl %ecx,%r15d xorl %eax,%r12d rorl $11,%r14d xorl %edx,%r15d addl %r12d,%ebx rorl $6,%r13d andl %r15d,%edi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%edi rorl $2,%r14d addl %ebx,%r9d addl %edi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d rorl $9,%r14d xorl %r9d,%r13d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d xorl %r9d,%r13d addl 28(%rsp),%eax movl %ebx,%edi xorl %r11d,%r12d rorl $11,%r14d xorl %ecx,%edi addl %r12d,%eax rorl $6,%r13d andl %edi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d rorl $2,%r14d addl %eax,%r8d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d rorl $14,%r13d movl %r14d,%eax movl %r9d,%r12d rorl $9,%r14d xorl %r8d,%r13d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d andl %r8d,%r12d xorl %r8d,%r13d addl 32(%rsp),%r11d movl %eax,%r15d xorl %r10d,%r12d rorl $11,%r14d xorl %ebx,%r15d addl %r12d,%r11d rorl $6,%r13d andl %r15d,%edi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%edi rorl $2,%r14d addl %r11d,%edx addl %edi,%r11d movl %edx,%r13d addl %r11d,%r14d rorl $14,%r13d movl %r14d,%r11d movl %r8d,%r12d rorl $9,%r14d xorl %edx,%r13d xorl %r9d,%r12d rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d xorl %edx,%r13d addl 36(%rsp),%r10d movl %r11d,%edi xorl %r9d,%r12d rorl $11,%r14d xorl %eax,%edi addl %r12d,%r10d rorl $6,%r13d andl %edi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d rorl $2,%r14d addl %r10d,%ecx addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d rorl $9,%r14d xorl %ecx,%r13d xorl %r8d,%r12d rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d xorl %ecx,%r13d addl 40(%rsp),%r9d movl %r10d,%r15d xorl %r8d,%r12d rorl $11,%r14d xorl %r11d,%r15d addl %r12d,%r9d rorl $6,%r13d andl %r15d,%edi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%edi rorl $2,%r14d addl %r9d,%ebx addl %edi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d rorl $9,%r14d xorl %ebx,%r13d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d xorl %ebx,%r13d addl 44(%rsp),%r8d movl %r9d,%edi xorl %edx,%r12d rorl $11,%r14d xorl %r10d,%edi addl %r12d,%r8d rorl $6,%r13d andl %edi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d rorl $2,%r14d addl %r8d,%eax addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d rorl $14,%r13d movl %r14d,%r8d movl %ebx,%r12d rorl $9,%r14d xorl %eax,%r13d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d andl %eax,%r12d xorl %eax,%r13d addl 48(%rsp),%edx movl %r8d,%r15d xorl %ecx,%r12d rorl $11,%r14d xorl %r9d,%r15d addl %r12d,%edx rorl $6,%r13d andl %r15d,%edi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%edi rorl $2,%r14d addl %edx,%r11d addl %edi,%edx movl %r11d,%r13d addl %edx,%r14d rorl $14,%r13d movl %r14d,%edx movl %eax,%r12d rorl $9,%r14d xorl %r11d,%r13d xorl %ebx,%r12d rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d xorl %r11d,%r13d addl 52(%rsp),%ecx movl %edx,%edi xorl %ebx,%r12d rorl $11,%r14d xorl %r8d,%edi addl %r12d,%ecx rorl $6,%r13d andl %edi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d rorl $2,%r14d addl %ecx,%r10d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d rorl $9,%r14d xorl %r10d,%r13d xorl %eax,%r12d rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d xorl %r10d,%r13d addl 56(%rsp),%ebx movl %ecx,%r15d xorl %eax,%r12d rorl $11,%r14d xorl %edx,%r15d addl %r12d,%ebx rorl $6,%r13d andl %r15d,%edi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%edi rorl $2,%r14d addl %ebx,%r9d addl %edi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d rorl $9,%r14d xorl %r9d,%r13d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d xorl %r9d,%r13d addl 60(%rsp),%eax movl %ebx,%edi xorl %r11d,%r12d rorl $11,%r14d xorl %ecx,%edi addl %r12d,%eax rorl $6,%r13d andl %edi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d rorl $2,%r14d addl %eax,%r8d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d movq 64+0(%rsp),%rdi movl %r14d,%eax addl 0(%rdi),%eax leaq 64(%rsi),%rsi addl 4(%rdi),%ebx addl 8(%rdi),%ecx addl 12(%rdi),%edx addl 16(%rdi),%r8d addl 20(%rdi),%r9d addl 24(%rdi),%r10d addl 28(%rdi),%r11d cmpq 64+16(%rsp),%rsi movl %eax,0(%rdi) movl %ebx,4(%rdi) movl %ecx,8(%rdi) movl %edx,12(%rdi) movl %r8d,16(%rdi) movl %r9d,20(%rdi) movl %r10d,24(%rdi) movl %r11d,28(%rdi) jb .Lloop_ssse3 movq 64+24(%rsp),%rsi movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 .size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +.type sha256_block_data_order_avx,@function +.align 64 +sha256_block_data_order_avx: +.Lavx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lavx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_avx + + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha256_block_data_order_avx,.-sha256_block_data_order_avx +.type sha256_block_data_order_avx2,@function +.align 64 +sha256_block_data_order_avx2: +.Lavx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $544,%rsp + shlq $4,%rdx + andq $-1024,%rsp + leaq (%rsi,%rdx,4),%rdx + addq $448,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_avx2: + + vzeroupper + subq $-64,%rsi + movl 0(%rdi),%eax + movq %rsi,%r12 + movl 4(%rdi),%ebx + cmpq %rdx,%rsi + movl 8(%rdi),%ecx + cmoveq %rsp,%r12 + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%ymm8 + vmovdqa K256+512+64(%rip),%ymm9 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi),%xmm0 + vmovdqu -64+16(%rsi),%xmm1 + vmovdqu -64+32(%rsi),%xmm2 + vmovdqu -64+48(%rsi),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%edi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%edi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm0,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 0(%rbp),%ymm0,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm1,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 32(%rbp),%ymm1,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm2,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 64(%rbp),%ymm2,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm3,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 96(%rbp),%ymm3,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rbp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + cmpq 80(%rbp),%rsi + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%edi + xorl %ecx,%edi + movl %r9d,%r12d + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rsp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + leaq 128(%rsi),%rsi + addl 24(%rdi),%r10d + movq %rsi,%r12 + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + cmoveq %rsp,%r12 + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2 Index: head/secure/lib/libcrypto/amd64/sha512-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/sha512-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/sha512-x86_64.S (revision 299481) @@ -1,1784 +1,5367 @@ # $FreeBSD$ + # Do not modify. This file is auto-generated from sha512-x86_64.pl. .text .globl sha512_block_data_order .type sha512_block_data_order,@function .align 16 sha512_block_data_order: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $2048,%r10d + jnz .Lxop_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r11 shlq $4,%rdx subq $128+32,%rsp leaq (%rsi,%rdx,8),%rdx andq $-64,%rsp movq %rdi,128+0(%rsp) movq %rsi,128+8(%rsp) movq %rdx,128+16(%rsp) movq %r11,128+24(%rsp) .Lprologue: movq 0(%rdi),%rax movq 8(%rdi),%rbx movq 16(%rdi),%rcx movq 24(%rdi),%rdx movq 32(%rdi),%r8 movq 40(%rdi),%r9 movq 48(%rdi),%r10 movq 56(%rdi),%r11 jmp .Lloop .align 16 .Lloop: movq %rbx,%rdi leaq K512(%rip),%rbp xorq %rcx,%rdi movq 0(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 xorq %r8,%r13 rorq $5,%r14 xorq %r10,%r15 movq %r12,0(%rsp) xorq %rax,%r14 andq %r8,%r15 rorq $4,%r13 addq %r11,%r12 xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 addq %r15,%r12 movq %rax,%r15 addq (%rbp),%r12 xorq %rax,%r14 xorq %rbx,%r15 rorq $14,%r13 movq %rbx,%r11 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 leaq 8(%rbp),%rbp addq %r14,%r11 movq 8(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 movq %r8,%rdi xorq %rdx,%r13 rorq $5,%r14 xorq %r9,%rdi movq %r12,8(%rsp) xorq %r11,%r14 andq %rdx,%rdi rorq $4,%r13 addq %r10,%r12 xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 addq %rdi,%r12 movq %r11,%rdi addq (%rbp),%r12 xorq %r11,%r14 xorq %rax,%rdi rorq $14,%r13 movq %rax,%r10 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 leaq 24(%rbp),%rbp addq %r14,%r10 movq 16(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 xorq %rcx,%r13 rorq $5,%r14 xorq %r8,%r15 movq %r12,16(%rsp) xorq %r10,%r14 andq %rcx,%r15 rorq $4,%r13 addq %r9,%r12 xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 addq %r15,%r12 movq %r10,%r15 addq (%rbp),%r12 xorq %r10,%r14 xorq %r11,%r15 rorq $14,%r13 movq %r11,%r9 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 leaq 8(%rbp),%rbp addq %r14,%r9 movq 24(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 movq %rcx,%rdi xorq %rbx,%r13 rorq $5,%r14 xorq %rdx,%rdi movq %r12,24(%rsp) xorq %r9,%r14 andq %rbx,%rdi rorq $4,%r13 addq %r8,%r12 xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 addq %rdi,%r12 movq %r9,%rdi addq (%rbp),%r12 xorq %r9,%r14 xorq %r10,%rdi rorq $14,%r13 movq %r10,%r8 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 leaq 24(%rbp),%rbp addq %r14,%r8 movq 32(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 xorq %rax,%r13 rorq $5,%r14 xorq %rcx,%r15 movq %r12,32(%rsp) xorq %r8,%r14 andq %rax,%r15 rorq $4,%r13 addq %rdx,%r12 xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 addq %r15,%r12 movq %r8,%r15 addq (%rbp),%r12 xorq %r8,%r14 xorq %r9,%r15 rorq $14,%r13 movq %r9,%rdx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx leaq 8(%rbp),%rbp addq %r14,%rdx movq 40(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 movq %rax,%rdi xorq %r11,%r13 rorq $5,%r14 xorq %rbx,%rdi movq %r12,40(%rsp) xorq %rdx,%r14 andq %r11,%rdi rorq $4,%r13 addq %rcx,%r12 xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 addq %rdi,%r12 movq %rdx,%rdi addq (%rbp),%r12 xorq %rdx,%r14 xorq %r8,%rdi rorq $14,%r13 movq %r8,%rcx andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx leaq 24(%rbp),%rbp addq %r14,%rcx movq 48(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 xorq %r10,%r13 rorq $5,%r14 xorq %rax,%r15 movq %r12,48(%rsp) xorq %rcx,%r14 andq %r10,%r15 rorq $4,%r13 addq %rbx,%r12 xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 addq %r15,%r12 movq %rcx,%r15 addq (%rbp),%r12 xorq %rcx,%r14 xorq %rdx,%r15 rorq $14,%r13 movq %rdx,%rbx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx leaq 8(%rbp),%rbp addq %r14,%rbx movq 56(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 movq %r10,%rdi xorq %r9,%r13 rorq $5,%r14 xorq %r11,%rdi movq %r12,56(%rsp) xorq %rbx,%r14 andq %r9,%rdi rorq $4,%r13 addq %rax,%r12 xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 addq %rdi,%r12 movq %rbx,%rdi addq (%rbp),%r12 xorq %rbx,%r14 xorq %rcx,%rdi rorq $14,%r13 movq %rcx,%rax andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rax addq %r12,%r8 addq %r12,%rax leaq 24(%rbp),%rbp addq %r14,%rax movq 64(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 xorq %r8,%r13 rorq $5,%r14 xorq %r10,%r15 movq %r12,64(%rsp) xorq %rax,%r14 andq %r8,%r15 rorq $4,%r13 addq %r11,%r12 xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 addq %r15,%r12 movq %rax,%r15 addq (%rbp),%r12 xorq %rax,%r14 xorq %rbx,%r15 rorq $14,%r13 movq %rbx,%r11 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 leaq 8(%rbp),%rbp addq %r14,%r11 movq 72(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 movq %r8,%rdi xorq %rdx,%r13 rorq $5,%r14 xorq %r9,%rdi movq %r12,72(%rsp) xorq %r11,%r14 andq %rdx,%rdi rorq $4,%r13 addq %r10,%r12 xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 addq %rdi,%r12 movq %r11,%rdi addq (%rbp),%r12 xorq %r11,%r14 xorq %rax,%rdi rorq $14,%r13 movq %rax,%r10 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 leaq 24(%rbp),%rbp addq %r14,%r10 movq 80(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 xorq %rcx,%r13 rorq $5,%r14 xorq %r8,%r15 movq %r12,80(%rsp) xorq %r10,%r14 andq %rcx,%r15 rorq $4,%r13 addq %r9,%r12 xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 addq %r15,%r12 movq %r10,%r15 addq (%rbp),%r12 xorq %r10,%r14 xorq %r11,%r15 rorq $14,%r13 movq %r11,%r9 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 leaq 8(%rbp),%rbp addq %r14,%r9 movq 88(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 movq %rcx,%rdi xorq %rbx,%r13 rorq $5,%r14 xorq %rdx,%rdi movq %r12,88(%rsp) xorq %r9,%r14 andq %rbx,%rdi rorq $4,%r13 addq %r8,%r12 xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 addq %rdi,%r12 movq %r9,%rdi addq (%rbp),%r12 xorq %r9,%r14 xorq %r10,%rdi rorq $14,%r13 movq %r10,%r8 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 leaq 24(%rbp),%rbp addq %r14,%r8 movq 96(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 xorq %rax,%r13 rorq $5,%r14 xorq %rcx,%r15 movq %r12,96(%rsp) xorq %r8,%r14 andq %rax,%r15 rorq $4,%r13 addq %rdx,%r12 xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 addq %r15,%r12 movq %r8,%r15 addq (%rbp),%r12 xorq %r8,%r14 xorq %r9,%r15 rorq $14,%r13 movq %r9,%rdx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx leaq 8(%rbp),%rbp addq %r14,%rdx movq 104(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 movq %rax,%rdi xorq %r11,%r13 rorq $5,%r14 xorq %rbx,%rdi movq %r12,104(%rsp) xorq %rdx,%r14 andq %r11,%rdi rorq $4,%r13 addq %rcx,%r12 xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 addq %rdi,%r12 movq %rdx,%rdi addq (%rbp),%r12 xorq %rdx,%r14 xorq %r8,%rdi rorq $14,%r13 movq %r8,%rcx andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx leaq 24(%rbp),%rbp addq %r14,%rcx movq 112(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 xorq %r10,%r13 rorq $5,%r14 xorq %rax,%r15 movq %r12,112(%rsp) xorq %rcx,%r14 andq %r10,%r15 rorq $4,%r13 addq %rbx,%r12 xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 addq %r15,%r12 movq %rcx,%r15 addq (%rbp),%r12 xorq %rcx,%r14 xorq %rdx,%r15 rorq $14,%r13 movq %rdx,%rbx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx leaq 8(%rbp),%rbp addq %r14,%rbx movq 120(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 movq %r10,%rdi xorq %r9,%r13 rorq $5,%r14 xorq %r11,%rdi movq %r12,120(%rsp) xorq %rbx,%r14 andq %r9,%rdi rorq $4,%r13 addq %rax,%r12 xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 addq %rdi,%r12 movq %rbx,%rdi addq (%rbp),%r12 xorq %rbx,%r14 xorq %rcx,%rdi rorq $14,%r13 movq %rcx,%rax andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rax addq %r12,%r8 addq %r12,%rax leaq 24(%rbp),%rbp jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movq 8(%rsp),%r13 movq 112(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%rax movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 72(%rsp),%r12 addq 0(%rsp),%r12 movq %r8,%r13 addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 xorq %r8,%r13 rorq $5,%r14 xorq %r10,%r15 movq %r12,0(%rsp) xorq %rax,%r14 andq %r8,%r15 rorq $4,%r13 addq %r11,%r12 xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 addq %r15,%r12 movq %rax,%r15 addq (%rbp),%r12 xorq %rax,%r14 xorq %rbx,%r15 rorq $14,%r13 movq %rbx,%r11 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 leaq 8(%rbp),%rbp movq 16(%rsp),%r13 movq 120(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%r11 movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 80(%rsp),%r12 addq 8(%rsp),%r12 movq %rdx,%r13 addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 movq %r8,%rdi xorq %rdx,%r13 rorq $5,%r14 xorq %r9,%rdi movq %r12,8(%rsp) xorq %r11,%r14 andq %rdx,%rdi rorq $4,%r13 addq %r10,%r12 xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 addq %rdi,%r12 movq %r11,%rdi addq (%rbp),%r12 xorq %r11,%r14 xorq %rax,%rdi rorq $14,%r13 movq %rax,%r10 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 leaq 24(%rbp),%rbp movq 24(%rsp),%r13 movq 0(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%r10 movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 88(%rsp),%r12 addq 16(%rsp),%r12 movq %rcx,%r13 addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 xorq %rcx,%r13 rorq $5,%r14 xorq %r8,%r15 movq %r12,16(%rsp) xorq %r10,%r14 andq %rcx,%r15 rorq $4,%r13 addq %r9,%r12 xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 addq %r15,%r12 movq %r10,%r15 addq (%rbp),%r12 xorq %r10,%r14 xorq %r11,%r15 rorq $14,%r13 movq %r11,%r9 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 leaq 8(%rbp),%rbp movq 32(%rsp),%r13 movq 8(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%r9 movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 96(%rsp),%r12 addq 24(%rsp),%r12 movq %rbx,%r13 addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 movq %rcx,%rdi xorq %rbx,%r13 rorq $5,%r14 xorq %rdx,%rdi movq %r12,24(%rsp) xorq %r9,%r14 andq %rbx,%rdi rorq $4,%r13 addq %r8,%r12 xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 addq %rdi,%r12 movq %r9,%rdi addq (%rbp),%r12 xorq %r9,%r14 xorq %r10,%rdi rorq $14,%r13 movq %r10,%r8 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 leaq 24(%rbp),%rbp movq 40(%rsp),%r13 movq 16(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%r8 movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 104(%rsp),%r12 addq 32(%rsp),%r12 movq %rax,%r13 addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 xorq %rax,%r13 rorq $5,%r14 xorq %rcx,%r15 movq %r12,32(%rsp) xorq %r8,%r14 andq %rax,%r15 rorq $4,%r13 addq %rdx,%r12 xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 addq %r15,%r12 movq %r8,%r15 addq (%rbp),%r12 xorq %r8,%r14 xorq %r9,%r15 rorq $14,%r13 movq %r9,%rdx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx leaq 8(%rbp),%rbp movq 48(%rsp),%r13 movq 24(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%rdx movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 112(%rsp),%r12 addq 40(%rsp),%r12 movq %r11,%r13 addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 movq %rax,%rdi xorq %r11,%r13 rorq $5,%r14 xorq %rbx,%rdi movq %r12,40(%rsp) xorq %rdx,%r14 andq %r11,%rdi rorq $4,%r13 addq %rcx,%r12 xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 addq %rdi,%r12 movq %rdx,%rdi addq (%rbp),%r12 xorq %rdx,%r14 xorq %r8,%rdi rorq $14,%r13 movq %r8,%rcx andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx leaq 24(%rbp),%rbp movq 56(%rsp),%r13 movq 32(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%rcx movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 120(%rsp),%r12 addq 48(%rsp),%r12 movq %r10,%r13 addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 xorq %r10,%r13 rorq $5,%r14 xorq %rax,%r15 movq %r12,48(%rsp) xorq %rcx,%r14 andq %r10,%r15 rorq $4,%r13 addq %rbx,%r12 xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 addq %r15,%r12 movq %rcx,%r15 addq (%rbp),%r12 xorq %rcx,%r14 xorq %rdx,%r15 rorq $14,%r13 movq %rdx,%rbx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx leaq 8(%rbp),%rbp movq 64(%rsp),%r13 movq 40(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%rbx movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 0(%rsp),%r12 addq 56(%rsp),%r12 movq %r9,%r13 addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 movq %r10,%rdi xorq %r9,%r13 rorq $5,%r14 xorq %r11,%rdi movq %r12,56(%rsp) xorq %rbx,%r14 andq %r9,%rdi rorq $4,%r13 addq %rax,%r12 xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 addq %rdi,%r12 movq %rbx,%rdi addq (%rbp),%r12 xorq %rbx,%r14 xorq %rcx,%rdi rorq $14,%r13 movq %rcx,%rax andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rax addq %r12,%r8 addq %r12,%rax leaq 24(%rbp),%rbp movq 72(%rsp),%r13 movq 48(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%rax movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 8(%rsp),%r12 addq 64(%rsp),%r12 movq %r8,%r13 addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 xorq %r8,%r13 rorq $5,%r14 xorq %r10,%r15 movq %r12,64(%rsp) xorq %rax,%r14 andq %r8,%r15 rorq $4,%r13 addq %r11,%r12 xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 addq %r15,%r12 movq %rax,%r15 addq (%rbp),%r12 xorq %rax,%r14 xorq %rbx,%r15 rorq $14,%r13 movq %rbx,%r11 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 leaq 8(%rbp),%rbp movq 80(%rsp),%r13 movq 56(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%r11 movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 16(%rsp),%r12 addq 72(%rsp),%r12 movq %rdx,%r13 addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 movq %r8,%rdi xorq %rdx,%r13 rorq $5,%r14 xorq %r9,%rdi movq %r12,72(%rsp) xorq %r11,%r14 andq %rdx,%rdi rorq $4,%r13 addq %r10,%r12 xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 addq %rdi,%r12 movq %r11,%rdi addq (%rbp),%r12 xorq %r11,%r14 xorq %rax,%rdi rorq $14,%r13 movq %rax,%r10 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 leaq 24(%rbp),%rbp movq 88(%rsp),%r13 movq 64(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%r10 movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 24(%rsp),%r12 addq 80(%rsp),%r12 movq %rcx,%r13 addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 xorq %rcx,%r13 rorq $5,%r14 xorq %r8,%r15 movq %r12,80(%rsp) xorq %r10,%r14 andq %rcx,%r15 rorq $4,%r13 addq %r9,%r12 xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 addq %r15,%r12 movq %r10,%r15 addq (%rbp),%r12 xorq %r10,%r14 xorq %r11,%r15 rorq $14,%r13 movq %r11,%r9 andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 leaq 8(%rbp),%rbp movq 96(%rsp),%r13 movq 72(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%r9 movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 32(%rsp),%r12 addq 88(%rsp),%r12 movq %rbx,%r13 addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 movq %rcx,%rdi xorq %rbx,%r13 rorq $5,%r14 xorq %rdx,%rdi movq %r12,88(%rsp) xorq %r9,%r14 andq %rbx,%rdi rorq $4,%r13 addq %r8,%r12 xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 addq %rdi,%r12 movq %r9,%rdi addq (%rbp),%r12 xorq %r9,%r14 xorq %r10,%rdi rorq $14,%r13 movq %r10,%r8 andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 leaq 24(%rbp),%rbp movq 104(%rsp),%r13 movq 80(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%r8 movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 40(%rsp),%r12 addq 96(%rsp),%r12 movq %rax,%r13 addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 xorq %rax,%r13 rorq $5,%r14 xorq %rcx,%r15 movq %r12,96(%rsp) xorq %r8,%r14 andq %rax,%r15 rorq $4,%r13 addq %rdx,%r12 xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 addq %r15,%r12 movq %r8,%r15 addq (%rbp),%r12 xorq %r8,%r14 xorq %r9,%r15 rorq $14,%r13 movq %r9,%rdx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx leaq 8(%rbp),%rbp movq 112(%rsp),%r13 movq 88(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%rdx movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 48(%rsp),%r12 addq 104(%rsp),%r12 movq %r11,%r13 addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 movq %rax,%rdi xorq %r11,%r13 rorq $5,%r14 xorq %rbx,%rdi movq %r12,104(%rsp) xorq %rdx,%r14 andq %r11,%rdi rorq $4,%r13 addq %rcx,%r12 xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 addq %rdi,%r12 movq %rdx,%rdi addq (%rbp),%r12 xorq %rdx,%r14 xorq %r8,%rdi rorq $14,%r13 movq %r8,%rcx andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx leaq 24(%rbp),%rbp movq 120(%rsp),%r13 movq 96(%rsp),%r15 movq %r13,%r12 rorq $7,%r13 addq %r14,%rcx movq %r15,%r14 rorq $42,%r15 xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 xorq %r13,%r12 xorq %r14,%r15 addq 56(%rsp),%r12 addq 112(%rsp),%r12 movq %r10,%r13 addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 xorq %r10,%r13 rorq $5,%r14 xorq %rax,%r15 movq %r12,112(%rsp) xorq %rcx,%r14 andq %r10,%r15 rorq $4,%r13 addq %rbx,%r12 xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 addq %r15,%r12 movq %rcx,%r15 addq (%rbp),%r12 xorq %rcx,%r14 xorq %rdx,%r15 rorq $14,%r13 movq %rdx,%rbx andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx leaq 8(%rbp),%rbp movq 0(%rsp),%r13 movq 104(%rsp),%rdi movq %r13,%r12 rorq $7,%r13 addq %r14,%rbx movq %rdi,%r14 rorq $42,%rdi xorq %r12,%r13 shrq $7,%r12 rorq $1,%r13 xorq %r14,%rdi shrq $6,%r14 rorq $19,%rdi xorq %r13,%r12 xorq %r14,%rdi addq 64(%rsp),%r12 addq 120(%rsp),%r12 movq %r9,%r13 addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 movq %r10,%rdi xorq %r9,%r13 rorq $5,%r14 xorq %r11,%rdi movq %r12,120(%rsp) xorq %rbx,%r14 andq %r9,%rdi rorq $4,%r13 addq %rax,%r12 xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 addq %rdi,%r12 movq %rbx,%rdi addq (%rbp),%r12 xorq %rbx,%r14 xorq %rcx,%rdi rorq $14,%r13 movq %rcx,%rax andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 xorq %r15,%rax addq %r12,%r8 addq %r12,%rax leaq 24(%rbp),%rbp cmpb $0,7(%rbp) jnz .Lrounds_16_xx movq 128+0(%rsp),%rdi addq %r14,%rax leaq 128(%rsi),%rsi addq 0(%rdi),%rax addq 8(%rdi),%rbx addq 16(%rdi),%rcx addq 24(%rdi),%rdx addq 32(%rdi),%r8 addq 40(%rdi),%r9 addq 48(%rdi),%r10 addq 56(%rdi),%r11 cmpq 128+16(%rsp),%rsi movq %rax,0(%rdi) movq %rbx,8(%rdi) movq %rcx,16(%rdi) movq %rdx,24(%rdi) movq %r8,32(%rdi) movq %r9,40(%rdi) movq %r10,48(%rdi) movq %r11,56(%rdi) jb .Lloop movq 128+24(%rsp),%rsi movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha512_block_data_order,.-sha512_block_data_order .align 64 .type K512,@object K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc .quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 .quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0x06ca6351e003826f,0x142929670a0e6e70 .quad 0x06ca6351e003826f,0x142929670a0e6e70 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x81c2c92e47edaee6,0x92722c851482353b .quad 0x81c2c92e47edaee6,0x92722c851482353b .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 .quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec .quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b .quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x113f9804bef90dae,0x1b710b35131c471b .quad 0x113f9804bef90dae,0x1b710b35131c471b .quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x0001020304050607,0x08090a0b0c0d0e0f .quad 0x0001020304050607,0x08090a0b0c0d0e0f .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha512_block_data_order_xop,@function +.align 64 +sha512_block_data_order_xop: +.Lxop_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_xop: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm0,%xmm0 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,223,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm7,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm0,%xmm0 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm1,%xmm1 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,216,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm0,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm1,%xmm1 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm2,%xmm2 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,217,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm1,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm2,%xmm2 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm3,%xmm3 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,218,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm2,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm3,%xmm3 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm4,%xmm4 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,219,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm3,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm4,%xmm4 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm5,%xmm5 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,220,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm4,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm5,%xmm5 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm6,%xmm6 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,221,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm5,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm6,%xmm6 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm7,%xmm7 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,222,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm6,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm7,%xmm7 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lxop_00_47 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_xop + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.size sha512_block_data_order_xop,.-sha512_block_data_order_xop +.type sha512_block_data_order_avx,@function +.align 64 +sha512_block_data_order_avx: +.Lavx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lavx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_avx + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha512_block_data_order_avx,.-sha512_block_data_order_avx +.type sha512_block_data_order_avx2,@function +.align 64 +sha512_block_data_order_avx2: +.Lavx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + leaq -128(%rsp),%rsp + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -128(%rsp),%rsp + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne .Lavx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je .Ldone_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2 Index: head/secure/lib/libcrypto/amd64/vpaes-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/vpaes-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/vpaes-x86_64.S (revision 299481) @@ -1,828 +1,829 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from vpaes-x86_64.pl. .text .type _vpaes_encrypt_core,@function .align 16 _vpaes_encrypt_core: movq %rdx,%r9 movq $16,%r11 movl 240(%rdx),%eax movdqa %xmm9,%xmm1 movdqa .Lk_ipt(%rip),%xmm2 pandn %xmm0,%xmm1 movdqu (%r9),%xmm5 psrld $4,%xmm1 pand %xmm9,%xmm0 .byte 102,15,56,0,208 movdqa .Lk_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 addq $16,%r9 pxor %xmm2,%xmm0 leaq .Lk_mc_backward(%rip),%r10 jmp .Lenc_entry .align 16 .Lenc_loop: movdqa %xmm13,%xmm4 movdqa %xmm12,%xmm0 .byte 102,15,56,0,226 .byte 102,15,56,0,195 pxor %xmm5,%xmm4 movdqa %xmm15,%xmm5 pxor %xmm4,%xmm0 movdqa -64(%r11,%r10,1),%xmm1 .byte 102,15,56,0,234 movdqa (%r11,%r10,1),%xmm4 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 .byte 102,15,56,0,220 addq $16,%r11 pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $0x30,%r11 subq $1,%rax pxor %xmm3,%xmm0 .Lenc_entry: movdqa %xmm9,%xmm1 movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 .byte 102,15,56,0,232 movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm10,%xmm4 pxor %xmm5,%xmm3 .byte 102,15,56,0,224 movdqa %xmm10,%xmm2 pxor %xmm5,%xmm4 .byte 102,15,56,0,211 movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%r9),%xmm5 pxor %xmm1,%xmm3 jnz .Lenc_loop movdqa -96(%r10),%xmm4 movdqa -80(%r10),%xmm0 .byte 102,15,56,0,226 pxor %xmm5,%xmm4 .byte 102,15,56,0,195 movdqa 64(%r11,%r10,1),%xmm1 pxor %xmm4,%xmm0 .byte 102,15,56,0,193 .byte 0xf3,0xc3 .size _vpaes_encrypt_core,.-_vpaes_encrypt_core .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: movq %rdx,%r9 movl 240(%rdx),%eax movdqa %xmm9,%xmm1 movdqa .Lk_dipt(%rip),%xmm2 pandn %xmm0,%xmm1 movq %rax,%r11 psrld $4,%xmm1 movdqu (%r9),%xmm5 shlq $4,%r11 pand %xmm9,%xmm0 .byte 102,15,56,0,208 movdqa .Lk_dipt+16(%rip),%xmm0 xorq $0x30,%r11 leaq .Lk_dsbd(%rip),%r10 .byte 102,15,56,0,193 andq $0x30,%r11 pxor %xmm5,%xmm2 movdqa .Lk_mc_forward+48(%rip),%xmm5 pxor %xmm2,%xmm0 addq $16,%r9 addq %r10,%r11 jmp .Ldec_entry .align 16 .Ldec_loop: movdqa -32(%r10),%xmm4 movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 0(%r10),%xmm4 pxor %xmm1,%xmm0 movdqa 16(%r10),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 32(%r10),%xmm4 pxor %xmm1,%xmm0 movdqa 48(%r10),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 64(%r10),%xmm4 pxor %xmm1,%xmm0 movdqa 80(%r10),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 addq $16,%r9 .byte 102,15,58,15,237,12 pxor %xmm1,%xmm0 subq $1,%rax .Ldec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 .byte 102,15,56,0,208 movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm10,%xmm4 pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%r9),%xmm0 pxor %xmm1,%xmm3 jnz .Ldec_loop movdqa 96(%r10),%xmm4 .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%r10),%xmm0 movdqa -352(%r11),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 .byte 0xf3,0xc3 .size _vpaes_decrypt_core,.-_vpaes_decrypt_core .type _vpaes_schedule_core,@function .align 16 _vpaes_schedule_core: call _vpaes_preheat movdqa .Lk_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 movdqa %xmm0,%xmm3 leaq .Lk_ipt(%rip),%r11 call _vpaes_schedule_transform movdqa %xmm0,%xmm7 leaq .Lk_sr(%rip),%r10 testq %rcx,%rcx jnz .Lschedule_am_decrypting movdqu %xmm0,(%rdx) jmp .Lschedule_go .Lschedule_am_decrypting: movdqa (%r8,%r10,1),%xmm1 .byte 102,15,56,0,217 movdqu %xmm3,(%rdx) xorq $0x30,%r8 .Lschedule_go: cmpl $192,%esi ja .Lschedule_256 je .Lschedule_192 .Lschedule_128: movl $10,%esi .Loop_schedule_128: call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last call _vpaes_schedule_mangle jmp .Loop_schedule_128 .align 16 .Lschedule_192: movdqu 8(%rdi),%xmm0 call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 movl $4,%esi .Loop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 call _vpaes_schedule_mangle call _vpaes_schedule_192_smear call _vpaes_schedule_mangle call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp .Loop_schedule_192 .align 16 .Lschedule_256: movdqu 16(%rdi),%xmm0 call _vpaes_schedule_transform movl $7,%esi .Loop_schedule_256: call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last call _vpaes_schedule_mangle pshufd $0xFF,%xmm0,%xmm0 movdqa %xmm7,%xmm5 movdqa %xmm6,%xmm7 call _vpaes_schedule_low_round movdqa %xmm5,%xmm7 jmp .Loop_schedule_256 .align 16 .Lschedule_mangle_last: leaq .Lk_deskew(%rip),%r11 testq %rcx,%rcx jnz .Lschedule_mangle_last_dec movdqa (%r8,%r10,1),%xmm1 .byte 102,15,56,0,193 leaq .Lk_opt(%rip),%r11 addq $32,%rdx .Lschedule_mangle_last_dec: addq $-16,%rdx pxor .Lk_s63(%rip),%xmm0 call _vpaes_schedule_transform movdqu %xmm0,(%rdx) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .byte 0xf3,0xc3 .size _vpaes_schedule_core,.-_vpaes_schedule_core .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: pshufd $0x80,%xmm6,%xmm1 pshufd $0xFE,%xmm7,%xmm0 pxor %xmm1,%xmm6 pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear .type _vpaes_schedule_round,@function .align 16 _vpaes_schedule_round: pxor %xmm1,%xmm1 .byte 102,65,15,58,15,200,15 .byte 102,69,15,58,15,192,15 pxor %xmm1,%xmm7 pshufd $0xFF,%xmm0,%xmm0 .byte 102,15,58,15,192,1 _vpaes_schedule_low_round: movdqa %xmm7,%xmm1 pslldq $4,%xmm7 pxor %xmm1,%xmm7 movdqa %xmm7,%xmm1 pslldq $8,%xmm7 pxor %xmm1,%xmm7 pxor .Lk_s63(%rip),%xmm7 movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 pxor %xmm1,%xmm0 movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 pxor %xmm0,%xmm2 movdqa %xmm10,%xmm3 .byte 102,15,56,0,220 pxor %xmm1,%xmm3 movdqa %xmm13,%xmm4 .byte 102,15,56,0,226 movdqa %xmm12,%xmm0 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 pxor %xmm7,%xmm0 movdqa %xmm0,%xmm7 .byte 0xf3,0xc3 .size _vpaes_schedule_round,.-_vpaes_schedule_round .type _vpaes_schedule_transform,@function .align 16 _vpaes_schedule_transform: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 movdqa (%r11),%xmm2 .byte 102,15,56,0,208 movdqa 16(%r11),%xmm0 .byte 102,15,56,0,193 pxor %xmm2,%xmm0 .byte 0xf3,0xc3 .size _vpaes_schedule_transform,.-_vpaes_schedule_transform .type _vpaes_schedule_mangle,@function .align 16 _vpaes_schedule_mangle: movdqa %xmm0,%xmm4 movdqa .Lk_mc_forward(%rip),%xmm5 testq %rcx,%rcx jnz .Lschedule_mangle_dec addq $16,%rdx pxor .Lk_s63(%rip),%xmm4 .byte 102,15,56,0,229 movdqa %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 jmp .Lschedule_mangle_both .align 16 .Lschedule_mangle_dec: leaq .Lk_dksd(%rip),%r11 movdqa %xmm9,%xmm1 pandn %xmm4,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm4 movdqa 0(%r11),%xmm2 .byte 102,15,56,0,212 movdqa 16(%r11),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 32(%r11),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 48(%r11),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 64(%r11),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 80(%r11),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 96(%r11),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 112(%r11),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 addq $-16,%rdx .Lschedule_mangle_both: movdqa (%r8,%r10,1),%xmm1 .byte 102,15,56,0,217 addq $-16,%r8 andq $0x30,%r8 movdqu %xmm3,(%rdx) .byte 0xf3,0xc3 .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle .globl vpaes_set_encrypt_key .type vpaes_set_encrypt_key,@function .align 16 vpaes_set_encrypt_key: movl %esi,%eax shrl $5,%eax addl $5,%eax movl %eax,240(%rdx) movl $0,%ecx movl $0x30,%r8d call _vpaes_schedule_core xorl %eax,%eax .byte 0xf3,0xc3 .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key .globl vpaes_set_decrypt_key .type vpaes_set_decrypt_key,@function .align 16 vpaes_set_decrypt_key: movl %esi,%eax shrl $5,%eax addl $5,%eax movl %eax,240(%rdx) shll $4,%eax leaq 16(%rdx,%rax,1),%rdx movl $1,%ecx movl %esi,%r8d shrl $1,%r8d andl $32,%r8d xorl $32,%r8d call _vpaes_schedule_core xorl %eax,%eax .byte 0xf3,0xc3 .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key .globl vpaes_encrypt .type vpaes_encrypt,@function .align 16 vpaes_encrypt: movdqu (%rdi),%xmm0 call _vpaes_preheat call _vpaes_encrypt_core movdqu %xmm0,(%rsi) .byte 0xf3,0xc3 .size vpaes_encrypt,.-vpaes_encrypt .globl vpaes_decrypt .type vpaes_decrypt,@function .align 16 vpaes_decrypt: movdqu (%rdi),%xmm0 call _vpaes_preheat call _vpaes_decrypt_core movdqu %xmm0,(%rsi) .byte 0xf3,0xc3 .size vpaes_decrypt,.-vpaes_decrypt .globl vpaes_cbc_encrypt .type vpaes_cbc_encrypt,@function .align 16 vpaes_cbc_encrypt: xchgq %rcx,%rdx subq $16,%rcx jc .Lcbc_abort movdqu (%r8),%xmm6 subq %rdi,%rsi call _vpaes_preheat cmpl $0,%r9d je .Lcbc_dec_loop jmp .Lcbc_enc_loop .align 16 .Lcbc_enc_loop: movdqu (%rdi),%xmm0 pxor %xmm6,%xmm0 call _vpaes_encrypt_core movdqa %xmm0,%xmm6 movdqu %xmm0,(%rsi,%rdi,1) leaq 16(%rdi),%rdi subq $16,%rcx jnc .Lcbc_enc_loop jmp .Lcbc_done .align 16 .Lcbc_dec_loop: movdqu (%rdi),%xmm0 movdqa %xmm0,%xmm7 call _vpaes_decrypt_core pxor %xmm6,%xmm0 movdqa %xmm7,%xmm6 movdqu %xmm0,(%rsi,%rdi,1) leaq 16(%rdi),%rdi subq $16,%rcx jnc .Lcbc_dec_loop .Lcbc_done: movdqu %xmm6,(%r8) .Lcbc_abort: .byte 0xf3,0xc3 .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt .type _vpaes_preheat,@function .align 16 _vpaes_preheat: leaq .Lk_s0F(%rip),%r10 movdqa -32(%r10),%xmm10 movdqa -16(%r10),%xmm11 movdqa 0(%r10),%xmm9 movdqa 48(%r10),%xmm13 movdqa 64(%r10),%xmm12 movdqa 80(%r10),%xmm15 movdqa 96(%r10),%xmm14 .byte 0xf3,0xc3 .size _vpaes_preheat,.-_vpaes_preheat .type _vpaes_consts,@object .align 64 _vpaes_consts: .Lk_inv: .quad 0x0E05060F0D080180, 0x040703090A0B0C02 .quad 0x01040A060F0B0780, 0x030D0E0C02050809 .Lk_s0F: .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F .Lk_ipt: .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 .Lk_sb1: .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF .Lk_sb2: .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A .Lk_sbo: .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA .Lk_mc_forward: .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 .quad 0x080B0A0904070605, 0x000302010C0F0E0D .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 .quad 0x000302010C0F0E0D, 0x080B0A0904070605 .Lk_mc_backward: .quad 0x0605040702010003, 0x0E0D0C0F0A09080B .quad 0x020100030E0D0C0F, 0x0A09080B06050407 .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 .quad 0x0A09080B06050407, 0x020100030E0D0C0F .Lk_sr: .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 .quad 0x030E09040F0A0500, 0x0B06010C07020D08 .quad 0x0F060D040B020900, 0x070E050C030A0108 .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 .Lk_rcon: .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 .Lk_s63: .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B .Lk_opt: .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 .Lk_deskew: .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 .Lk_dksd: .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E .Lk_dksb: .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 .Lk_dkse: .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 .Lk_dks9: .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE .Lk_dipt: .quad 0x0F505B040B545F00, 0x154A411E114E451A .quad 0x86E383E660056500, 0x12771772F491F194 .Lk_dsb9: .quad 0x851C03539A86D600, 0xCAD51F504F994CC9 .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 .Lk_dsbd: .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 .Lk_dsbb: .quad 0xD022649296B44200, 0x602646F6B0F2D404 .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B .Lk_dsbe: .quad 0x46F2929626D4D000, 0x2242600464B4F6B0 .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 .Lk_dsbo: .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 .align 64 .size _vpaes_consts,.-_vpaes_consts Index: head/secure/lib/libcrypto/amd64/wp-x86_64.S =================================================================== --- head/secure/lib/libcrypto/amd64/wp-x86_64.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/wp-x86_64.S (revision 299481) @@ -1,862 +1,863 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from wp-x86_64.pl. .text .globl whirlpool_block .type whirlpool_block,@function .align 16 whirlpool_block: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movq %rsp,%r11 subq $128+40,%rsp andq $-64,%rsp leaq 128(%rsp),%r10 movq %rdi,0(%r10) movq %rsi,8(%r10) movq %rdx,16(%r10) movq %r11,32(%r10) .Lprologue: movq %r10,%rbx leaq .Ltable(%rip),%rbp xorq %rcx,%rcx xorq %rdx,%rdx movq 0(%rdi),%r8 movq 8(%rdi),%r9 movq 16(%rdi),%r10 movq 24(%rdi),%r11 movq 32(%rdi),%r12 movq 40(%rdi),%r13 movq 48(%rdi),%r14 movq 56(%rdi),%r15 .Louterloop: movq %r8,0(%rsp) movq %r9,8(%rsp) movq %r10,16(%rsp) movq %r11,24(%rsp) movq %r12,32(%rsp) movq %r13,40(%rsp) movq %r14,48(%rsp) movq %r15,56(%rsp) xorq 0(%rsi),%r8 xorq 8(%rsi),%r9 xorq 16(%rsi),%r10 xorq 24(%rsi),%r11 xorq 32(%rsi),%r12 xorq 40(%rsi),%r13 xorq 48(%rsi),%r14 xorq 56(%rsi),%r15 movq %r8,64+0(%rsp) movq %r9,64+8(%rsp) movq %r10,64+16(%rsp) movq %r11,64+24(%rsp) movq %r12,64+32(%rsp) movq %r13,64+40(%rsp) movq %r14,64+48(%rsp) movq %r15,64+56(%rsp) xorq %rsi,%rsi movq %rsi,24(%rbx) jmp .Lround .align 16 .Lround: movq 4096(%rbp,%rsi,8),%r8 movl 0(%rsp),%eax movl 4(%rsp),%ebx movzbl %al,%ecx movzbl %ah,%edx shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 movq 7(%rbp,%rdi,8),%r9 movl 0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx movq 6(%rbp,%rsi,8),%r10 movq 5(%rbp,%rdi,8),%r11 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx movq 4(%rbp,%rsi,8),%r12 movq 3(%rbp,%rdi,8),%r13 movl 0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx movq 2(%rbp,%rsi,8),%r14 movq 1(%rbp,%rdi,8),%r15 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 movl 8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 movl 8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 movl 16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 movl 16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 movl 24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 movl 24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 movl 32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 movl 32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 movl 40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 movl 40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 movl 48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 movl 48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 movl 56+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 movl 56+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 movq %r8,0(%rsp) movq %r9,8(%rsp) movq %r10,16(%rsp) movq %r11,24(%rsp) movq %r12,32(%rsp) movq %r13,40(%rsp) movq %r14,48(%rsp) movq %r15,56(%rsp) shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 xorq 7(%rbp,%rdi,8),%r9 movl 64+0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r10 xorq 5(%rbp,%rdi,8),%r11 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r12 xorq 3(%rbp,%rdi,8),%r13 movl 64+0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r14 xorq 1(%rbp,%rdi,8),%r15 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 movl 64+8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 movl 64+8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 movl 64+16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 movl 64+16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 movl 64+24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 movl 64+24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 movl 64+32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 movl 64+32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 movl 64+40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 movl 64+40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 movl 64+48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 movl 64+48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 shrl $16,%eax leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 leaq (%rcx,%rcx,1),%rsi movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 leaq 128(%rsp),%rbx movq 24(%rbx),%rsi addq $1,%rsi cmpq $10,%rsi je .Lroundsdone movq %rsi,24(%rbx) movq %r8,64+0(%rsp) movq %r9,64+8(%rsp) movq %r10,64+16(%rsp) movq %r11,64+24(%rsp) movq %r12,64+32(%rsp) movq %r13,64+40(%rsp) movq %r14,64+48(%rsp) movq %r15,64+56(%rsp) jmp .Lround .align 16 .Lroundsdone: movq 0(%rbx),%rdi movq 8(%rbx),%rsi movq 16(%rbx),%rax xorq 0(%rsi),%r8 xorq 8(%rsi),%r9 xorq 16(%rsi),%r10 xorq 24(%rsi),%r11 xorq 32(%rsi),%r12 xorq 40(%rsi),%r13 xorq 48(%rsi),%r14 xorq 56(%rsi),%r15 xorq 0(%rdi),%r8 xorq 8(%rdi),%r9 xorq 16(%rdi),%r10 xorq 24(%rdi),%r11 xorq 32(%rdi),%r12 xorq 40(%rdi),%r13 xorq 48(%rdi),%r14 xorq 56(%rdi),%r15 movq %r8,0(%rdi) movq %r9,8(%rdi) movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) leaq 64(%rsi),%rsi subq $1,%rax jz .Lalldone movq %rsi,8(%rbx) movq %rax,16(%rbx) jmp .Louterloop .Lalldone: movq 32(%rbx),%rsi movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lepilogue: .byte 0xf3,0xc3 .size whirlpool_block,.-whirlpool_block .align 64 .type .Ltable,@object .Ltable: .byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216 .byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38 .byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184 .byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251 .byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203 .byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17 .byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9 .byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13 .byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155 .byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255 .byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12 .byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14 .byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150 .byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48 .byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109 .byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248 .byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71 .byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53 .byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55 .byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138 .byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210 .byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108 .byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132 .byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128 .byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245 .byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179 .byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33 .byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156 .byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67 .byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41 .byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93 .byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213 .byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189 .byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232 .byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146 .byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158 .byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19 .byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35 .byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32 .byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68 .byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162 .byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207 .byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124 .byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90 .byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80 .byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201 .byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20 .byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217 .byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60 .byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143 .byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144 .byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7 .byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221 .byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211 .byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45 .byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120 .byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151 .byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2 .byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115 .byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167 .byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246 .byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178 .byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73 .byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86 .byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112 .byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205 .byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187 .byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113 .byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123 .byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175 .byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69 .byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26 .byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212 .byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88 .byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46 .byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63 .byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172 .byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176 .byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239 .byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182 .byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92 .byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18 .byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147 .byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222 .byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198 .byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209 .byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59 .byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95 .byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49 .byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168 .byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185 .byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188 .byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62 .byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11 .byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191 .byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89 .byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242 .byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119 .byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51 .byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244 .byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39 .byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235 .byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137 .byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50 .byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84 .byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141 .byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100 .byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157 .byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61 .byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15 .byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202 .byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183 .byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125 .byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206 .byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127 .byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47 .byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99 .byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42 .byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204 .byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130 .byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122 .byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72 .byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149 .byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223 .byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77 .byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192 .byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145 .byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200 .byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 .byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249 .byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110 .byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225 .byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230 .byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40 .byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195 .byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116 .byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190 .byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29 .byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234 .byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87 .byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56 .byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173 .byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196 .byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218 .byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199 .byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219 .byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233 .byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106 .byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3 .byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74 .byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142 .byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96 .byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252 .byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70 .byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31 .byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118 .byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250 .byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54 .byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174 .byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75 .byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133 .byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126 .byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231 .byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85 .byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58 .byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129 .byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82 .byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98 .byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163 .byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16 .byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171 .byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208 .byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197 .byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236 .byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22 .byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148 .byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159 .byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229 .byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152 .byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23 .byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228 .byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161 .byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78 .byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66 .byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52 .byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8 .byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238 .byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97 .byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177 .byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79 .byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36 .byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227 .byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37 .byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34 .byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101 .byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121 .byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105 .byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169 .byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25 .byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254 .byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154 .byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240 .byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153 .byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131 .byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4 .byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102 .byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224 .byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193 .byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253 .byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64 .byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28 .byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24 .byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139 .byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81 .byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5 .byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140 .byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57 .byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170 .byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27 .byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220 .byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94 .byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160 .byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136 .byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103 .byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10 .byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135 .byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241 .byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114 .byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83 .byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1 .byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43 .byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164 .byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243 .byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21 .byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76 .byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165 .byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181 .byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180 .byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186 .byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166 .byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247 .byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6 .byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65 .byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215 .byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111 .byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30 .byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214 .byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226 .byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104 .byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44 .byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237 .byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117 .byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134 .byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107 .byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194 .byte 24,35,198,232,135,184,1,79 .byte 54,166,210,245,121,111,145,82 .byte 96,188,155,142,163,12,123,53 .byte 29,224,215,194,46,75,254,87 .byte 21,119,55,229,159,240,74,218 .byte 88,201,41,10,177,160,107,133 .byte 189,93,16,244,203,62,5,103 .byte 228,39,65,139,167,125,149,216 .byte 251,238,124,102,221,23,71,158 .byte 202,45,191,7,173,90,131,51 Index: head/secure/lib/libcrypto/amd64/x86_64-gf2m.S =================================================================== --- head/secure/lib/libcrypto/amd64/x86_64-gf2m.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/x86_64-gf2m.S (revision 299481) @@ -1,292 +1,293 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86_64-gf2m.pl. .text .type _mul_1x1,@function .align 16 _mul_1x1: subq $128+8,%rsp movq $-1,%r9 leaq (%rax,%rax,1),%rsi shrq $3,%r9 leaq (,%rax,4),%rdi andq %rax,%r9 leaq (,%rax,8),%r12 sarq $63,%rax leaq (%r9,%r9,1),%r10 sarq $63,%rsi leaq (,%r9,4),%r11 andq %rbp,%rax sarq $63,%rdi movq %rax,%rdx shlq $63,%rax andq %rbp,%rsi shrq $1,%rdx movq %rsi,%rcx shlq $62,%rsi andq %rbp,%rdi shrq $2,%rcx xorq %rsi,%rax movq %rdi,%rbx shlq $61,%rdi xorq %rcx,%rdx shrq $3,%rbx xorq %rdi,%rax xorq %rbx,%rdx movq %r9,%r13 movq $0,0(%rsp) xorq %r10,%r13 movq %r9,8(%rsp) movq %r11,%r14 movq %r10,16(%rsp) xorq %r12,%r14 movq %r13,24(%rsp) xorq %r11,%r9 movq %r11,32(%rsp) xorq %r11,%r10 movq %r9,40(%rsp) xorq %r11,%r13 movq %r10,48(%rsp) xorq %r14,%r9 movq %r13,56(%rsp) xorq %r14,%r10 movq %r12,64(%rsp) xorq %r14,%r13 movq %r9,72(%rsp) xorq %r11,%r9 movq %r10,80(%rsp) xorq %r11,%r10 movq %r13,88(%rsp) xorq %r11,%r13 movq %r14,96(%rsp) movq %r8,%rsi movq %r9,104(%rsp) andq %rbp,%rsi movq %r10,112(%rsp) shrq $4,%rbp movq %r13,120(%rsp) movq %r8,%rdi andq %rbp,%rdi shrq $4,%rbp movq (%rsp,%rsi,8),%xmm0 movq %r8,%rsi andq %rbp,%rsi shrq $4,%rbp movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $4,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $60,%rbx xorq %rcx,%rax pslldq $1,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $12,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $52,%rbx xorq %rcx,%rax pslldq $2,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $20,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $44,%rbx xorq %rcx,%rax pslldq $3,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $28,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $36,%rbx xorq %rcx,%rax pslldq $4,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $36,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $28,%rbx xorq %rcx,%rax pslldq $5,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $44,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $20,%rbx xorq %rcx,%rax pslldq $6,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %r8,%rdi movq %rcx,%rbx shlq $52,%rcx andq %rbp,%rdi movq (%rsp,%rsi,8),%xmm1 shrq $12,%rbx xorq %rcx,%rax pslldq $7,%xmm1 movq %r8,%rsi shrq $4,%rbp xorq %rbx,%rdx andq %rbp,%rsi shrq $4,%rbp pxor %xmm1,%xmm0 movq (%rsp,%rdi,8),%rcx movq %rcx,%rbx shlq $60,%rcx .byte 102,72,15,126,198 shrq $4,%rbx xorq %rcx,%rax psrldq $8,%xmm0 xorq %rbx,%rdx .byte 102,72,15,126,199 xorq %rsi,%rax xorq %rdi,%rdx addq $128+8,%rsp .byte 0xf3,0xc3 .Lend_mul_1x1: .size _mul_1x1,.-_mul_1x1 .globl bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,@function .align 16 bn_GF2m_mul_2x2: movq OPENSSL_ia32cap_P(%rip),%rax btq $33,%rax jnc .Lvanilla_mul_2x2 .byte 102,72,15,110,198 .byte 102,72,15,110,201 .byte 102,72,15,110,210 .byte 102,73,15,110,216 movdqa %xmm0,%xmm4 movdqa %xmm1,%xmm5 .byte 102,15,58,68,193,0 pxor %xmm2,%xmm4 pxor %xmm3,%xmm5 .byte 102,15,58,68,211,0 .byte 102,15,58,68,229,0 xorps %xmm0,%xmm4 xorps %xmm2,%xmm4 movdqa %xmm4,%xmm5 pslldq $8,%xmm4 psrldq $8,%xmm5 pxor %xmm4,%xmm2 pxor %xmm5,%xmm0 movdqu %xmm2,0(%rdi) movdqu %xmm0,16(%rdi) .byte 0xf3,0xc3 .align 16 .Lvanilla_mul_2x2: leaq -136(%rsp),%rsp movq %r14,80(%rsp) movq %r13,88(%rsp) movq %r12,96(%rsp) movq %rbp,104(%rsp) movq %rbx,112(%rsp) .Lbody_mul_2x2: movq %rdi,32(%rsp) movq %rsi,40(%rsp) movq %rdx,48(%rsp) movq %rcx,56(%rsp) movq %r8,64(%rsp) movq $0xf,%r8 movq %rsi,%rax movq %rcx,%rbp call _mul_1x1 movq %rax,16(%rsp) movq %rdx,24(%rsp) movq 48(%rsp),%rax movq 64(%rsp),%rbp call _mul_1x1 movq %rax,0(%rsp) movq %rdx,8(%rsp) movq 40(%rsp),%rax movq 56(%rsp),%rbp xorq 48(%rsp),%rax xorq 64(%rsp),%rbp call _mul_1x1 movq 0(%rsp),%rbx movq 8(%rsp),%rcx movq 16(%rsp),%rdi movq 24(%rsp),%rsi movq 32(%rsp),%rbp xorq %rdx,%rax xorq %rcx,%rdx xorq %rbx,%rax movq %rbx,0(%rbp) xorq %rdi,%rdx movq %rsi,24(%rbp) xorq %rsi,%rax xorq %rsi,%rdx xorq %rdx,%rax movq %rdx,16(%rbp) movq %rax,8(%rbp) movq 80(%rsp),%r14 movq 88(%rsp),%r13 movq 96(%rsp),%r12 movq 104(%rsp),%rbp movq 112(%rsp),%rbx leaq 136(%rsp),%rsp .byte 0xf3,0xc3 .Lend_mul_2x2: .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 16 Index: head/secure/lib/libcrypto/amd64/x86_64-mont.S =================================================================== --- head/secure/lib/libcrypto/amd64/x86_64-mont.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/x86_64-mont.S (revision 299481) @@ -1,778 +1,1134 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86_64-mont.pl. .text .globl bn_mul_mont .type bn_mul_mont,@function .align 16 bn_mul_mont: testl $3,%r9d jnz .Lmul_enter cmpl $8,%r9d jb .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d cmpq %rsi,%rdx jne .Lmul4x_enter testl $7,%r9d jz .Lsqr8x_enter jmp .Lmul4x_enter .align 16 .Lmul_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movl %r9d,%r9d leaq 2(%r9),%r10 movq %rsp,%r11 negq %r10 leaq (%rsp,%r10,8),%rsp andq $-1024,%rsp movq %r11,8(%rsp,%r9,8) .Lmul_body: subq %rsp,%r11 andq $-4096,%r11 .Lmul_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x66,0x2e jnc .Lmul_page_walk movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%r13 leaq 1(%r15),%r15 jmp .L1st_enter .align 16 .L1st: addq %rax,%r13 movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%r13 movq %r10,%r11 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 .L1st_enter: mulq %rbx addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx leaq 1(%r15),%r15 movq %rdx,%r10 mulq %rbp cmpq %r9,%r15 jne .L1st addq %rax,%r13 movq (%rsi),%rax adcq $0,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 movq %r10,%r11 xorq %rdx,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r9,8) movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 jmp .Louter .align 16 .Louter: movq (%r12,%r14,8),%rbx xorq %r15,%r15 movq %r8,%rbp movq (%rsp),%r10 mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq 8(%rsp),%r10 movq %rdx,%r13 leaq 1(%r15),%r15 jmp .Linner_enter .align 16 .Linner: addq %rax,%r13 movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 movq (%rsp,%r15,8),%r10 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 .Linner_enter: mulq %rbx addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 leaq 1(%r15),%r15 mulq %rbp cmpq %r9,%r15 jne .Linner addq %rax,%r13 movq (%rsi),%rax adcq $0,%rdx addq %r10,%r13 movq (%rsp,%r15,8),%r10 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 xorq %rdx,%rdx addq %r11,%r13 adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r9,8) movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 cmpq %r9,%r14 jb .Louter xorq %r14,%r14 movq (%rsp),%rax leaq (%rsp),%rsi movq %r9,%r15 jmp .Lsub .align 16 .Lsub: sbbq (%rcx,%r14,8),%rax movq %rax,(%rdi,%r14,8) movq 8(%rsi,%r14,8),%rax leaq 1(%r14),%r14 decq %r15 jnz .Lsub sbbq $0,%rax xorq %r14,%r14 andq %rax,%rsi notq %rax movq %rdi,%rcx andq %rax,%rcx movq %r9,%r15 orq %rcx,%rsi .align 16 .Lcopy: movq (%rsi,%r14,8),%rax movq %r14,(%rsp,%r14,8) movq %rax,(%rdi,%r14,8) leaq 1(%r14),%r14 subq $1,%r15 jnz .Lcopy movq 8(%rsp,%r9,8),%rsi movq $1,%rax movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size bn_mul_mont,.-bn_mul_mont .type bn_mul4x_mont,@function .align 16 bn_mul4x_mont: .Lmul4x_enter: + andl $0x80100,%r11d + cmpl $0x80100,%r11d + je .Lmulx4x_enter pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movl %r9d,%r9d leaq 4(%r9),%r10 movq %rsp,%r11 negq %r10 leaq (%rsp,%r10,8),%rsp andq $-1024,%rsp movq %r11,8(%rsp,%r9,8) .Lmul4x_body: subq %rsp,%r11 andq $-4096,%r11 .Lmul4x_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x2e jnc .Lmul4x_page_walk movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi),%rax adcq $0,%rdx addq %r11,%rdi leaq 4(%r15),%r15 adcq $0,%rdx movq %rdi,(%rsp) movq %rdx,%r13 jmp .L1st4x .align 16 .L1st4x: mulq %rbx addq %rax,%r10 movq -16(%rcx,%r15,8),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx,%r15,8),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%rsp,%r15,8) movq %rdx,%r13 mulq %rbx addq %rax,%r10 movq (%rcx,%r15,8),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq 8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx,%r15,8),%rax adcq $0,%rdx leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq -16(%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 jb .L1st4x mulq %rbx addq %rax,%r10 movq -16(%rcx,%r15,8),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx,%r15,8),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%rsp,%r15,8) movq %rdx,%r13 xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi movq %r13,-8(%rsp,%r15,8) movq %rdi,(%rsp,%r15,8) leaq 1(%r14),%r14 .align 4 .Louter4x: movq (%r12,%r14,8),%rbx xorq %r15,%r15 movq (%rsp),%r10 movq %r8,%rbp mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx addq 8(%rsp),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi),%rax adcq $0,%rdx addq %r11,%rdi leaq 4(%r15),%r15 adcq $0,%rdx movq %rdi,(%rsp) movq %rdx,%r13 jmp .Linner4x .align 16 .Linner4x: mulq %rbx addq %rax,%r10 movq -16(%rcx,%r15,8),%rax adcq $0,%rdx addq -16(%rsp,%r15,8),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx,%r15,8),%rax adcq $0,%rdx addq -8(%rsp,%r15,8),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%rsp,%r15,8) movq %rdx,%r13 mulq %rbx addq %rax,%r10 movq (%rcx,%r15,8),%rax adcq $0,%rdx addq (%rsp,%r15,8),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq 8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx,%r15,8),%rax adcq $0,%rdx addq 8(%rsp,%r15,8),%r11 adcq $0,%rdx leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq -16(%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 jb .Linner4x mulq %rbx addq %rax,%r10 movq -16(%rcx,%r15,8),%rax adcq $0,%rdx addq -16(%rsp,%r15,8),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%rsp,%r15,8) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx,%r15,8),%rax adcq $0,%rdx addq -8(%rsp,%r15,8),%r11 adcq $0,%rdx leaq 1(%r14),%r14 movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%rsp,%r15,8) movq %rdx,%r13 xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi addq (%rsp,%r9,8),%r13 adcq $0,%rdi movq %r13,-8(%rsp,%r15,8) movq %rdi,(%rsp,%r15,8) cmpq %r9,%r14 jb .Louter4x movq 16(%rsp,%r9,8),%rdi movq 0(%rsp),%rax pxor %xmm0,%xmm0 movq 8(%rsp),%rdx shrq $2,%r9 leaq (%rsp),%rsi xorq %r14,%r14 subq 0(%rcx),%rax movq 16(%rsi),%rbx movq 24(%rsi),%rbp sbbq 8(%rcx),%rdx leaq -1(%r9),%r15 jmp .Lsub4x .align 16 .Lsub4x: movq %rax,0(%rdi,%r14,8) movq %rdx,8(%rdi,%r14,8) sbbq 16(%rcx,%r14,8),%rbx movq 32(%rsi,%r14,8),%rax movq 40(%rsi,%r14,8),%rdx sbbq 24(%rcx,%r14,8),%rbp movq %rbx,16(%rdi,%r14,8) movq %rbp,24(%rdi,%r14,8) sbbq 32(%rcx,%r14,8),%rax movq 48(%rsi,%r14,8),%rbx movq 56(%rsi,%r14,8),%rbp sbbq 40(%rcx,%r14,8),%rdx leaq 4(%r14),%r14 decq %r15 jnz .Lsub4x movq %rax,0(%rdi,%r14,8) movq 32(%rsi,%r14,8),%rax sbbq 16(%rcx,%r14,8),%rbx movq %rdx,8(%rdi,%r14,8) sbbq 24(%rcx,%r14,8),%rbp movq %rbx,16(%rdi,%r14,8) sbbq $0,%rax movq %rbp,24(%rdi,%r14,8) xorq %r14,%r14 andq %rax,%rsi notq %rax movq %rdi,%rcx andq %rax,%rcx leaq -1(%r9),%r15 orq %rcx,%rsi movdqu (%rsi),%xmm1 movdqa %xmm0,(%rsp) movdqu %xmm1,(%rdi) jmp .Lcopy4x .align 16 .Lcopy4x: movdqu 16(%rsi,%r14,1),%xmm2 movdqu 32(%rsi,%r14,1),%xmm1 movdqa %xmm0,16(%rsp,%r14,1) movdqu %xmm2,16(%rdi,%r14,1) movdqa %xmm0,32(%rsp,%r14,1) movdqu %xmm1,32(%rdi,%r14,1) leaq 32(%r14),%r14 decq %r15 jnz .Lcopy4x shlq $2,%r9 movdqu 16(%rsi,%r14,1),%xmm2 movdqa %xmm0,16(%rsp,%r14,1) movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 movq 24(%rsi),%r12 movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp .Lmul4x_epilogue: .byte 0xf3,0xc3 .size bn_mul4x_mont,.-bn_mul4x_mont + .type bn_sqr8x_mont,@function .align 32 bn_sqr8x_mont: .Lsqr8x_enter: movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 movl %r9d,%r10d shll $3,%r9d shlq $3+2,%r10 negq %r9 leaq -64(%rsp,%r9,2),%r11 movq (%r8),%r8 subq %rsi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lsqr8x_sp_alt subq %r11,%rsp leaq -64(%rsp,%r9,2),%rsp jmp .Lsqr8x_sp_done .align 32 .Lsqr8x_sp_alt: leaq 4096-64(,%r9,2),%r10 leaq -64(%rsp,%r9,2),%rsp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 subq %r11,%rsp .Lsqr8x_sp_done: andq $-64,%rsp movq %rax,%r11 subq %rsp,%r11 andq $-4096,%r11 .Lsqr8x_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x2e jnc .Lsqr8x_page_walk movq %r9,%r10 negq %r9 movq %r8,32(%rsp) movq %rax,40(%rsp) .Lsqr8x_body: .byte 102,72,15,110,209 pxor %xmm0,%xmm0 .byte 102,72,15,110,207 .byte 102,73,15,110,218 + movl OPENSSL_ia32cap_P+8(%rip),%eax + andl $0x80100,%eax + cmpl $0x80100,%eax + jne .Lsqr8x_nox + + call bn_sqrx8x_internal + + + + + leaq (%r8,%rcx,1),%rbx + movq %rcx,%r9 + movq %rcx,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp .Lsqr8x_sub + +.align 32 +.Lsqr8x_nox: call bn_sqr8x_internal leaq (%rdi,%r9,1),%rbx movq %r9,%rcx movq %r9,%rdx .byte 102,72,15,126,207 sarq $3+2,%rcx jmp .Lsqr8x_sub .align 32 .Lsqr8x_sub: movq 0(%rbx),%r12 movq 8(%rbx),%r13 movq 16(%rbx),%r14 movq 24(%rbx),%r15 leaq 32(%rbx),%rbx sbbq 0(%rbp),%r12 sbbq 8(%rbp),%r13 sbbq 16(%rbp),%r14 sbbq 24(%rbp),%r15 leaq 32(%rbp),%rbp movq %r12,0(%rdi) movq %r13,8(%rdi) movq %r14,16(%rdi) movq %r15,24(%rdi) leaq 32(%rdi),%rdi incq %rcx jnz .Lsqr8x_sub sbbq $0,%rax leaq (%rbx,%r9,1),%rbx leaq (%rdi,%r9,1),%rdi .byte 102,72,15,110,200 pxor %xmm0,%xmm0 pshufd $0,%xmm1,%xmm1 movq 40(%rsp),%rsi jmp .Lsqr8x_cond_copy .align 32 .Lsqr8x_cond_copy: movdqa 0(%rbx),%xmm2 movdqa 16(%rbx),%xmm3 leaq 32(%rbx),%rbx movdqu 0(%rdi),%xmm4 movdqu 16(%rdi),%xmm5 leaq 32(%rdi),%rdi movdqa %xmm0,-32(%rbx) movdqa %xmm0,-16(%rbx) movdqa %xmm0,-32(%rbx,%rdx,1) movdqa %xmm0,-16(%rbx,%rdx,1) pcmpeqd %xmm1,%xmm0 pand %xmm1,%xmm2 pand %xmm1,%xmm3 pand %xmm0,%xmm4 pand %xmm0,%xmm5 pxor %xmm0,%xmm0 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqu %xmm4,-32(%rdi) movdqu %xmm5,-16(%rdi) addq $32,%r9 jnz .Lsqr8x_cond_copy movq $1,%rax movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lsqr8x_epilogue: .byte 0xf3,0xc3 .size bn_sqr8x_mont,.-bn_sqr8x_mont +.type bn_mulx4x_mont,@function +.align 32 +bn_mulx4x_mont: +.Lmulx4x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d +.byte 0x67 + xorq %r10,%r10 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rsp + andq $-128,%rsp + movq %rax,%r11 + subq %rsp,%r11 + andq $-4096,%r11 +.Lmulx4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x66,0x2e + jnc .Lmulx4x_page_walk + + leaq (%rdx,%r9,1),%r10 + + + + + + + + + + + + + movq %r9,0(%rsp) + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) + movq %r9,48(%rsp) + jmp .Lmulx4x_body + +.align 32 +.Lmulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 + addq %rax,%r11 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 + adcq $0,%r13 + + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq %rbp,%r12 + adcxq %rbp,%r13 + + movq %rdi,8(%rsp) +.byte 0x67 + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adoxq -16(%rbx),%r12 + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + leaq 32(%rcx),%rcx + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + + cmpq 16(%rsp),%rdi + jne .Lmulx4x_outer + + leaq 64(%rsp),%rbx + subq %rax,%rcx + negq %r15 + movq %rax,%rdx + shrq $3+2,%rax + movq 32(%rsp),%rdi + jmp .Lmulx4x_sub + +.align 32 +.Lmulx4x_sub: + movq 0(%rbx),%r11 + movq 8(%rbx),%r12 + movq 16(%rbx),%r13 + movq 24(%rbx),%r14 + leaq 32(%rbx),%rbx + sbbq 0(%rcx),%r11 + sbbq 8(%rcx),%r12 + sbbq 16(%rcx),%r13 + sbbq 24(%rcx),%r14 + leaq 32(%rcx),%rcx + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + movq %r14,24(%rdi) + leaq 32(%rdi),%rdi + decq %rax + jnz .Lmulx4x_sub + + sbbq $0,%r15 + leaq 64(%rsp),%rbx + subq %rdx,%rdi + +.byte 102,73,15,110,207 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi + jmp .Lmulx4x_cond_copy + +.align 32 +.Lmulx4x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + subq $32,%rdx + jnz .Lmulx4x_cond_copy + + movq %rdx,(%rbx) + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmulx4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mulx4x_mont,.-bn_mulx4x_mont .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 16 Index: head/secure/lib/libcrypto/amd64/x86_64-mont5.S =================================================================== --- head/secure/lib/libcrypto/amd64/x86_64-mont5.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/x86_64-mont5.S (revision 299481) @@ -1,2294 +1,3594 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86_64-mont5.pl. .text .globl bn_mul_mont_gather5 .type bn_mul_mont_gather5,@function .align 64 bn_mul_mont_gather5: testl $7,%r9d jnz .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d jmp .Lmul4x_enter .align 16 .Lmul_enter: movl %r9d,%r9d movq %rsp,%rax movd 8(%rsp),%xmm5 leaq .Linc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 leaq 2(%r9),%r11 negq %r11 leaq -264(%rsp,%r11,8),%rsp andq $-1024,%rsp movq %rax,8(%rsp,%r9,8) .Lmul_body: subq %rsp,%rax andq $-4096,%rax .Lmul_page_walk: movq (%rsp,%rax,1),%r11 subq $4096,%rax .byte 0x2e jnc .Lmul_page_walk leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 movdqa 16(%r10),%xmm1 leaq 24-112(%rsp,%r9,8),%r10 andq $-16,%r10 pshufd $0,%xmm5,%xmm5 movdqa %xmm1,%xmm4 movdqa %xmm1,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 .byte 0x67 movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,112(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,128(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,144(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,160(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,176(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,192(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,208(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,224(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,240(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,256(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,272(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,288(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,304(%r10) paddd %xmm2,%xmm3 .byte 0x67 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,320(%r10) pcmpeqd %xmm5,%xmm3 movdqa %xmm2,336(%r10) pand 64(%r12),%xmm0 pand 80(%r12),%xmm1 pand 96(%r12),%xmm2 movdqa %xmm3,352(%r10) pand 112(%r12),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa -128(%r12),%xmm4 movdqa -112(%r12),%xmm5 movdqa -96(%r12),%xmm2 pand 112(%r10),%xmm4 movdqa -80(%r12),%xmm3 pand 128(%r10),%xmm5 por %xmm4,%xmm0 pand 144(%r10),%xmm2 por %xmm5,%xmm1 pand 160(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa -64(%r12),%xmm4 movdqa -48(%r12),%xmm5 movdqa -32(%r12),%xmm2 pand 176(%r10),%xmm4 movdqa -16(%r12),%xmm3 pand 192(%r10),%xmm5 por %xmm4,%xmm0 pand 208(%r10),%xmm2 por %xmm5,%xmm1 pand 224(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa 0(%r12),%xmm4 movdqa 16(%r12),%xmm5 movdqa 32(%r12),%xmm2 pand 240(%r10),%xmm4 movdqa 48(%r12),%xmm3 pand 256(%r10),%xmm5 por %xmm4,%xmm0 pand 272(%r10),%xmm2 por %xmm5,%xmm1 pand 288(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 por %xmm1,%xmm0 pshufd $0x4e,%xmm0,%xmm1 por %xmm1,%xmm0 leaq 256(%r12),%r12 .byte 102,72,15,126,195 movq (%r8),%r8 movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%r13 leaq 1(%r15),%r15 jmp .L1st_enter .align 16 .L1st: addq %rax,%r13 movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r11,%r13 movq %r10,%r11 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 .L1st_enter: mulq %rbx addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx leaq 1(%r15),%r15 movq %rdx,%r10 mulq %rbp cmpq %r9,%r15 jne .L1st addq %rax,%r13 adcq $0,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,-16(%rsp,%r9,8) movq %rdx,%r13 movq %r10,%r11 xorq %rdx,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r9,8) movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 jmp .Louter .align 16 .Louter: leaq 24+128(%rsp,%r9,8),%rdx andq $-16,%rdx pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 movdqa -128(%r12),%xmm0 movdqa -112(%r12),%xmm1 movdqa -96(%r12),%xmm2 movdqa -80(%r12),%xmm3 pand -128(%rdx),%xmm0 pand -112(%rdx),%xmm1 por %xmm0,%xmm4 pand -96(%rdx),%xmm2 por %xmm1,%xmm5 pand -80(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa -64(%r12),%xmm0 movdqa -48(%r12),%xmm1 movdqa -32(%r12),%xmm2 movdqa -16(%r12),%xmm3 pand -64(%rdx),%xmm0 pand -48(%rdx),%xmm1 por %xmm0,%xmm4 pand -32(%rdx),%xmm2 por %xmm1,%xmm5 pand -16(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 0(%r12),%xmm0 movdqa 16(%r12),%xmm1 movdqa 32(%r12),%xmm2 movdqa 48(%r12),%xmm3 pand 0(%rdx),%xmm0 pand 16(%rdx),%xmm1 por %xmm0,%xmm4 pand 32(%rdx),%xmm2 por %xmm1,%xmm5 pand 48(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 64(%r12),%xmm0 movdqa 80(%r12),%xmm1 movdqa 96(%r12),%xmm2 movdqa 112(%r12),%xmm3 pand 64(%rdx),%xmm0 pand 80(%rdx),%xmm1 por %xmm0,%xmm4 pand 96(%rdx),%xmm2 por %xmm1,%xmm5 pand 112(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 por %xmm5,%xmm4 pshufd $0x4e,%xmm4,%xmm0 por %xmm4,%xmm0 leaq 256(%r12),%r12 movq (%rsi),%rax .byte 102,72,15,126,195 xorq %r15,%r15 movq %r8,%rbp movq (%rsp),%r10 mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx imulq %r10,%rbp movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi),%rax adcq $0,%rdx movq 8(%rsp),%r10 movq %rdx,%r13 leaq 1(%r15),%r15 jmp .Linner_enter .align 16 .Linner: addq %rax,%r13 movq (%rsi,%r15,8),%rax adcq $0,%rdx addq %r10,%r13 movq (%rsp,%r15,8),%r10 adcq $0,%rdx movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 .Linner_enter: mulq %rbx addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 leaq 1(%r15),%r15 mulq %rbp cmpq %r9,%r15 jne .Linner addq %rax,%r13 adcq $0,%rdx addq %r10,%r13 movq (%rsp,%r9,8),%r10 adcq $0,%rdx movq %r13,-16(%rsp,%r9,8) movq %rdx,%r13 xorq %rdx,%rdx addq %r11,%r13 adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-8(%rsp,%r9,8) movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 cmpq %r9,%r14 jb .Louter xorq %r14,%r14 movq (%rsp),%rax leaq (%rsp),%rsi movq %r9,%r15 jmp .Lsub .align 16 .Lsub: sbbq (%rcx,%r14,8),%rax movq %rax,(%rdi,%r14,8) movq 8(%rsi,%r14,8),%rax leaq 1(%r14),%r14 decq %r15 jnz .Lsub sbbq $0,%rax xorq %r14,%r14 andq %rax,%rsi notq %rax movq %rdi,%rcx andq %rax,%rcx movq %r9,%r15 orq %rcx,%rsi .align 16 .Lcopy: movq (%rsi,%r14,8),%rax movq %r14,(%rsp,%r14,8) movq %rax,(%rdi,%r14,8) leaq 1(%r14),%r14 subq $1,%r15 jnz .Lcopy movq 8(%rsp,%r9,8),%rsi movq $1,%rax movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 .type bn_mul4x_mont_gather5,@function .align 32 bn_mul4x_mont_gather5: .Lmul4x_enter: + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lmulx4x_enter .byte 0x67 movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 .byte 0x67 shll $3,%r9d leaq (%r9,%r9,2),%r10 negq %r9 leaq -320(%rsp,%r9,2),%r11 subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lmul4xsp_alt subq %r11,%rsp leaq -320(%rsp,%r9,2),%rsp jmp .Lmul4xsp_done .align 32 .Lmul4xsp_alt: leaq 4096-320(,%r9,2),%r10 leaq -320(%rsp,%r9,2),%rsp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 subq %r11,%rsp .Lmul4xsp_done: andq $-64,%rsp movq %rax,%r11 subq %rsp,%r11 andq $-4096,%r11 .Lmul4x_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x2e jnc .Lmul4x_page_walk negq %r9 movq %rax,40(%rsp) .Lmul4x_body: call mul4x_internal movq 40(%rsp),%rsi movq $1,%rax movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lmul4x_epilogue: .byte 0xf3,0xc3 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .type mul4x_internal,@function .align 32 mul4x_internal: shlq $5,%r9 movd 8(%rax),%xmm5 leaq .Linc(%rip),%rax leaq 128(%rdx,%r9,1),%r13 shrq $5,%r9 movdqa 0(%rax),%xmm0 movdqa 16(%rax),%xmm1 leaq 88-112(%rsp,%r9,1),%r10 leaq 128(%rdx),%r12 pshufd $0,%xmm5,%xmm5 movdqa %xmm1,%xmm4 .byte 0x67,0x67 movdqa %xmm1,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 .byte 0x67 movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,112(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,128(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,144(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,160(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,176(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,192(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,208(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,224(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,240(%r10) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,256(%r10) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,272(%r10) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,288(%r10) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,304(%r10) paddd %xmm2,%xmm3 .byte 0x67 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,320(%r10) pcmpeqd %xmm5,%xmm3 movdqa %xmm2,336(%r10) pand 64(%r12),%xmm0 pand 80(%r12),%xmm1 pand 96(%r12),%xmm2 movdqa %xmm3,352(%r10) pand 112(%r12),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa -128(%r12),%xmm4 movdqa -112(%r12),%xmm5 movdqa -96(%r12),%xmm2 pand 112(%r10),%xmm4 movdqa -80(%r12),%xmm3 pand 128(%r10),%xmm5 por %xmm4,%xmm0 pand 144(%r10),%xmm2 por %xmm5,%xmm1 pand 160(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa -64(%r12),%xmm4 movdqa -48(%r12),%xmm5 movdqa -32(%r12),%xmm2 pand 176(%r10),%xmm4 movdqa -16(%r12),%xmm3 pand 192(%r10),%xmm5 por %xmm4,%xmm0 pand 208(%r10),%xmm2 por %xmm5,%xmm1 pand 224(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 movdqa 0(%r12),%xmm4 movdqa 16(%r12),%xmm5 movdqa 32(%r12),%xmm2 pand 240(%r10),%xmm4 movdqa 48(%r12),%xmm3 pand 256(%r10),%xmm5 por %xmm4,%xmm0 pand 272(%r10),%xmm2 por %xmm5,%xmm1 pand 288(%r10),%xmm3 por %xmm2,%xmm0 por %xmm3,%xmm1 por %xmm1,%xmm0 pshufd $0x4e,%xmm0,%xmm1 por %xmm1,%xmm0 leaq 256(%r12),%r12 .byte 102,72,15,126,195 movq %r13,16+8(%rsp) movq %rdi,56+8(%rsp) movq (%r8),%r8 movq (%rsi),%rax leaq (%rsi,%r9,1),%rsi negq %r9 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax imulq %r10,%rbp leaq 64+8(%rsp),%r14 movq %rdx,%r11 mulq %rbp addq %rax,%r10 movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi leaq 32(%r9),%r15 leaq 32(%rcx),%rcx adcq $0,%rdx movq %rdi,(%r14) movq %rdx,%r13 jmp .L1st4x .align 32 .L1st4x: mulq %rbx addq %rax,%r10 movq -16(%rcx),%rax leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 movq 0(%rcx),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-8(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi leaq 32(%rcx),%rcx adcq $0,%rdx movq %rdi,(%r14) movq %rdx,%r13 addq $32,%r15 jnz .L1st4x mulq %rbx addq %rax,%r10 movq -16(%rcx),%rax leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %rdi,-16(%r14) movq %rdx,%r13 leaq (%rcx,%r9,1),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi movq %r13,-8(%r14) jmp .Louter4x .align 32 .Louter4x: leaq 16+128(%r14),%rdx pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 movdqa -128(%r12),%xmm0 movdqa -112(%r12),%xmm1 movdqa -96(%r12),%xmm2 movdqa -80(%r12),%xmm3 pand -128(%rdx),%xmm0 pand -112(%rdx),%xmm1 por %xmm0,%xmm4 pand -96(%rdx),%xmm2 por %xmm1,%xmm5 pand -80(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa -64(%r12),%xmm0 movdqa -48(%r12),%xmm1 movdqa -32(%r12),%xmm2 movdqa -16(%r12),%xmm3 pand -64(%rdx),%xmm0 pand -48(%rdx),%xmm1 por %xmm0,%xmm4 pand -32(%rdx),%xmm2 por %xmm1,%xmm5 pand -16(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 0(%r12),%xmm0 movdqa 16(%r12),%xmm1 movdqa 32(%r12),%xmm2 movdqa 48(%r12),%xmm3 pand 0(%rdx),%xmm0 pand 16(%rdx),%xmm1 por %xmm0,%xmm4 pand 32(%rdx),%xmm2 por %xmm1,%xmm5 pand 48(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 64(%r12),%xmm0 movdqa 80(%r12),%xmm1 movdqa 96(%r12),%xmm2 movdqa 112(%r12),%xmm3 pand 64(%rdx),%xmm0 pand 80(%rdx),%xmm1 por %xmm0,%xmm4 pand 96(%rdx),%xmm2 por %xmm1,%xmm5 pand 112(%rdx),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 por %xmm5,%xmm4 pshufd $0x4e,%xmm4,%xmm0 por %xmm4,%xmm0 leaq 256(%r12),%r12 .byte 102,72,15,126,195 movq (%r14,%r9,1),%r10 movq %r8,%rbp mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx imulq %r10,%rbp movq %rdx,%r11 movq %rdi,(%r14) leaq (%r14,%r9,1),%r14 mulq %rbp addq %rax,%r10 movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx addq 8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi leaq 32(%r9),%r15 leaq 32(%rcx),%rcx adcq $0,%rdx movq %rdx,%r13 jmp .Linner4x .align 32 .Linner4x: mulq %rbx addq %rax,%r10 movq -16(%rcx),%rax adcq $0,%rdx addq 16(%r14),%r10 leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq -8(%rcx),%rax adcq $0,%rdx addq -8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %r13,-24(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 movq 0(%rcx),%rax adcq $0,%rdx addq (%r14),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %rdi,-16(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq 8(%rcx),%rax adcq $0,%rdx addq 8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi leaq 32(%rcx),%rcx adcq $0,%rdx movq %r13,-8(%r14) movq %rdx,%r13 addq $32,%r15 jnz .Linner4x mulq %rbx addq %rax,%r10 movq -16(%rcx),%rax adcq $0,%rdx addq 16(%r14),%r10 leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 movq %rbp,%rax movq -8(%rcx),%rbp adcq $0,%rdx addq -8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx movq %r13,-24(%r14) movq %rdx,%r13 movq %rdi,-16(%r14) leaq (%rcx,%r9,1),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi addq (%r14),%r13 adcq $0,%rdi movq %r13,-8(%r14) cmpq 16+8(%rsp),%r12 jb .Louter4x xorq %rax,%rax subq %r13,%rbp adcq %r15,%r15 orq %r15,%rdi subq %rdi,%rax leaq (%r14,%r9,1),%rbx movq (%rcx),%r12 leaq (%rcx),%rbp movq %r9,%rcx sarq $3+2,%rcx movq 56+8(%rsp),%rdi decq %r12 xorq %r10,%r10 movq 8(%rbp),%r13 movq 16(%rbp),%r14 movq 24(%rbp),%r15 jmp .Lsqr4x_sub_entry .size mul4x_internal,.-mul4x_internal .globl bn_power5 .type bn_power5,@function .align 32 bn_power5: + movl OPENSSL_ia32cap_P+8(%rip),%r11d + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lpowerx5_enter movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 shll $3,%r9d leal (%r9,%r9,2),%r10d negq %r9 movq (%r8),%r8 leaq -320(%rsp,%r9,2),%r11 subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lpwr_sp_alt subq %r11,%rsp leaq -320(%rsp,%r9,2),%rsp jmp .Lpwr_sp_done .align 32 .Lpwr_sp_alt: leaq 4096-320(,%r9,2),%r10 leaq -320(%rsp,%r9,2),%rsp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 subq %r11,%rsp .Lpwr_sp_done: andq $-64,%rsp movq %rax,%r11 subq %rsp,%r11 andq $-4096,%r11 .Lpwr_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x2e jnc .Lpwr_page_walk movq %r9,%r10 negq %r9 movq %r8,32(%rsp) movq %rax,40(%rsp) .Lpower5_body: .byte 102,72,15,110,207 .byte 102,72,15,110,209 .byte 102,73,15,110,218 .byte 102,72,15,110,226 call __bn_sqr8x_internal call __bn_post4x_internal call __bn_sqr8x_internal call __bn_post4x_internal call __bn_sqr8x_internal call __bn_post4x_internal call __bn_sqr8x_internal call __bn_post4x_internal call __bn_sqr8x_internal call __bn_post4x_internal .byte 102,72,15,126,209 .byte 102,72,15,126,226 movq %rsi,%rdi movq 40(%rsp),%rax leaq 32(%rsp),%r8 call mul4x_internal movq 40(%rsp),%rsi movq $1,%rax movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lpower5_epilogue: .byte 0xf3,0xc3 .size bn_power5,.-bn_power5 .globl bn_sqr8x_internal .hidden bn_sqr8x_internal .type bn_sqr8x_internal,@function .align 32 bn_sqr8x_internal: __bn_sqr8x_internal: leaq 32(%r10),%rbp leaq (%rsi,%r9,1),%rsi movq %r9,%rcx movq -32(%rsi,%rbp,1),%r14 leaq 48+8(%rsp,%r9,2),%rdi movq -24(%rsi,%rbp,1),%rax leaq -32(%rdi,%rbp,1),%rdi movq -16(%rsi,%rbp,1),%rbx movq %rax,%r15 mulq %r14 movq %rax,%r10 movq %rbx,%rax movq %rdx,%r11 movq %r10,-24(%rdi,%rbp,1) mulq %r14 addq %rax,%r11 movq %rbx,%rax adcq $0,%rdx movq %r11,-16(%rdi,%rbp,1) movq %rdx,%r10 movq -8(%rsi,%rbp,1),%rbx mulq %r15 movq %rax,%r12 movq %rbx,%rax movq %rdx,%r13 leaq (%rbp),%rcx mulq %r14 addq %rax,%r10 movq %rbx,%rax movq %rdx,%r11 adcq $0,%r11 addq %r12,%r10 adcq $0,%r11 movq %r10,-8(%rdi,%rcx,1) jmp .Lsqr4x_1st .align 32 .Lsqr4x_1st: movq (%rsi,%rcx,1),%rbx mulq %r15 addq %rax,%r13 movq %rbx,%rax movq %rdx,%r12 adcq $0,%r12 mulq %r14 addq %rax,%r11 movq %rbx,%rax movq 8(%rsi,%rcx,1),%rbx movq %rdx,%r10 adcq $0,%r10 addq %r13,%r11 adcq $0,%r10 mulq %r15 addq %rax,%r12 movq %rbx,%rax movq %r11,(%rdi,%rcx,1) movq %rdx,%r13 adcq $0,%r13 mulq %r14 addq %rax,%r10 movq %rbx,%rax movq 16(%rsi,%rcx,1),%rbx movq %rdx,%r11 adcq $0,%r11 addq %r12,%r10 adcq $0,%r11 mulq %r15 addq %rax,%r13 movq %rbx,%rax movq %r10,8(%rdi,%rcx,1) movq %rdx,%r12 adcq $0,%r12 mulq %r14 addq %rax,%r11 movq %rbx,%rax movq 24(%rsi,%rcx,1),%rbx movq %rdx,%r10 adcq $0,%r10 addq %r13,%r11 adcq $0,%r10 mulq %r15 addq %rax,%r12 movq %rbx,%rax movq %r11,16(%rdi,%rcx,1) movq %rdx,%r13 adcq $0,%r13 leaq 32(%rcx),%rcx mulq %r14 addq %rax,%r10 movq %rbx,%rax movq %rdx,%r11 adcq $0,%r11 addq %r12,%r10 adcq $0,%r11 movq %r10,-8(%rdi,%rcx,1) cmpq $0,%rcx jne .Lsqr4x_1st mulq %r15 addq %rax,%r13 leaq 16(%rbp),%rbp adcq $0,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,(%rdi) movq %rdx,%r12 movq %rdx,8(%rdi) jmp .Lsqr4x_outer .align 32 .Lsqr4x_outer: movq -32(%rsi,%rbp,1),%r14 leaq 48+8(%rsp,%r9,2),%rdi movq -24(%rsi,%rbp,1),%rax leaq -32(%rdi,%rbp,1),%rdi movq -16(%rsi,%rbp,1),%rbx movq %rax,%r15 mulq %r14 movq -24(%rdi,%rbp,1),%r10 addq %rax,%r10 movq %rbx,%rax adcq $0,%rdx movq %r10,-24(%rdi,%rbp,1) movq %rdx,%r11 mulq %r14 addq %rax,%r11 movq %rbx,%rax adcq $0,%rdx addq -16(%rdi,%rbp,1),%r11 movq %rdx,%r10 adcq $0,%r10 movq %r11,-16(%rdi,%rbp,1) xorq %r12,%r12 movq -8(%rsi,%rbp,1),%rbx mulq %r15 addq %rax,%r12 movq %rbx,%rax adcq $0,%rdx addq -8(%rdi,%rbp,1),%r12 movq %rdx,%r13 adcq $0,%r13 mulq %r14 addq %rax,%r10 movq %rbx,%rax adcq $0,%rdx addq %r12,%r10 movq %rdx,%r11 adcq $0,%r11 movq %r10,-8(%rdi,%rbp,1) leaq (%rbp),%rcx jmp .Lsqr4x_inner .align 32 .Lsqr4x_inner: movq (%rsi,%rcx,1),%rbx mulq %r15 addq %rax,%r13 movq %rbx,%rax movq %rdx,%r12 adcq $0,%r12 addq (%rdi,%rcx,1),%r13 adcq $0,%r12 .byte 0x67 mulq %r14 addq %rax,%r11 movq %rbx,%rax movq 8(%rsi,%rcx,1),%rbx movq %rdx,%r10 adcq $0,%r10 addq %r13,%r11 adcq $0,%r10 mulq %r15 addq %rax,%r12 movq %r11,(%rdi,%rcx,1) movq %rbx,%rax movq %rdx,%r13 adcq $0,%r13 addq 8(%rdi,%rcx,1),%r12 leaq 16(%rcx),%rcx adcq $0,%r13 mulq %r14 addq %rax,%r10 movq %rbx,%rax adcq $0,%rdx addq %r12,%r10 movq %rdx,%r11 adcq $0,%r11 movq %r10,-8(%rdi,%rcx,1) cmpq $0,%rcx jne .Lsqr4x_inner .byte 0x67 mulq %r15 addq %rax,%r13 adcq $0,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,(%rdi) movq %rdx,%r12 movq %rdx,8(%rdi) addq $16,%rbp jnz .Lsqr4x_outer movq -32(%rsi),%r14 leaq 48+8(%rsp,%r9,2),%rdi movq -24(%rsi),%rax leaq -32(%rdi,%rbp,1),%rdi movq -16(%rsi),%rbx movq %rax,%r15 mulq %r14 addq %rax,%r10 movq %rbx,%rax movq %rdx,%r11 adcq $0,%r11 mulq %r14 addq %rax,%r11 movq %rbx,%rax movq %r10,-24(%rdi) movq %rdx,%r10 adcq $0,%r10 addq %r13,%r11 movq -8(%rsi),%rbx adcq $0,%r10 mulq %r15 addq %rax,%r12 movq %rbx,%rax movq %r11,-16(%rdi) movq %rdx,%r13 adcq $0,%r13 mulq %r14 addq %rax,%r10 movq %rbx,%rax movq %rdx,%r11 adcq $0,%r11 addq %r12,%r10 adcq $0,%r11 movq %r10,-8(%rdi) mulq %r15 addq %rax,%r13 movq -16(%rsi),%rax adcq $0,%rdx addq %r11,%r13 adcq $0,%rdx movq %r13,(%rdi) movq %rdx,%r12 movq %rdx,8(%rdi) mulq %rbx addq $16,%rbp xorq %r14,%r14 subq %r9,%rbp xorq %r15,%r15 addq %r12,%rax adcq $0,%rdx movq %rax,8(%rdi) movq %rdx,16(%rdi) movq %r15,24(%rdi) movq -16(%rsi,%rbp,1),%rax leaq 48+8(%rsp),%rdi xorq %r10,%r10 movq 8(%rdi),%r11 leaq (%r14,%r10,2),%r12 shrq $63,%r10 leaq (%rcx,%r11,2),%r13 shrq $63,%r11 orq %r10,%r13 movq 16(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq 24(%rdi),%r11 adcq %rax,%r12 movq -8(%rsi,%rbp,1),%rax movq %r12,(%rdi) adcq %rdx,%r13 leaq (%r14,%r10,2),%rbx movq %r13,8(%rdi) sbbq %r15,%r15 shrq $63,%r10 leaq (%rcx,%r11,2),%r8 shrq $63,%r11 orq %r10,%r8 movq 32(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq 40(%rdi),%r11 adcq %rax,%rbx movq 0(%rsi,%rbp,1),%rax movq %rbx,16(%rdi) adcq %rdx,%r8 leaq 16(%rbp),%rbp movq %r8,24(%rdi) sbbq %r15,%r15 leaq 64(%rdi),%rdi jmp .Lsqr4x_shift_n_add .align 32 .Lsqr4x_shift_n_add: leaq (%r14,%r10,2),%r12 shrq $63,%r10 leaq (%rcx,%r11,2),%r13 shrq $63,%r11 orq %r10,%r13 movq -16(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq -8(%rdi),%r11 adcq %rax,%r12 movq -8(%rsi,%rbp,1),%rax movq %r12,-32(%rdi) adcq %rdx,%r13 leaq (%r14,%r10,2),%rbx movq %r13,-24(%rdi) sbbq %r15,%r15 shrq $63,%r10 leaq (%rcx,%r11,2),%r8 shrq $63,%r11 orq %r10,%r8 movq 0(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq 8(%rdi),%r11 adcq %rax,%rbx movq 0(%rsi,%rbp,1),%rax movq %rbx,-16(%rdi) adcq %rdx,%r8 leaq (%r14,%r10,2),%r12 movq %r8,-8(%rdi) sbbq %r15,%r15 shrq $63,%r10 leaq (%rcx,%r11,2),%r13 shrq $63,%r11 orq %r10,%r13 movq 16(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq 24(%rdi),%r11 adcq %rax,%r12 movq 8(%rsi,%rbp,1),%rax movq %r12,0(%rdi) adcq %rdx,%r13 leaq (%r14,%r10,2),%rbx movq %r13,8(%rdi) sbbq %r15,%r15 shrq $63,%r10 leaq (%rcx,%r11,2),%r8 shrq $63,%r11 orq %r10,%r8 movq 32(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq 40(%rdi),%r11 adcq %rax,%rbx movq 16(%rsi,%rbp,1),%rax movq %rbx,16(%rdi) adcq %rdx,%r8 movq %r8,24(%rdi) sbbq %r15,%r15 leaq 64(%rdi),%rdi addq $32,%rbp jnz .Lsqr4x_shift_n_add leaq (%r14,%r10,2),%r12 .byte 0x67 shrq $63,%r10 leaq (%rcx,%r11,2),%r13 shrq $63,%r11 orq %r10,%r13 movq -16(%rdi),%r10 movq %r11,%r14 mulq %rax negq %r15 movq -8(%rdi),%r11 adcq %rax,%r12 movq -8(%rsi),%rax movq %r12,-32(%rdi) adcq %rdx,%r13 leaq (%r14,%r10,2),%rbx movq %r13,-24(%rdi) sbbq %r15,%r15 shrq $63,%r10 leaq (%rcx,%r11,2),%r8 shrq $63,%r11 orq %r10,%r8 mulq %rax negq %r15 adcq %rax,%rbx adcq %rdx,%r8 movq %rbx,-16(%rdi) movq %r8,-8(%rdi) .byte 102,72,15,126,213 __bn_sqr8x_reduction: xorq %rax,%rax leaq (%r9,%rbp,1),%rcx leaq 48+8(%rsp,%r9,2),%rdx movq %rcx,0+8(%rsp) leaq 48+8(%rsp,%r9,1),%rdi movq %rdx,8+8(%rsp) negq %r9 jmp .L8x_reduction_loop .align 32 .L8x_reduction_loop: leaq (%rdi,%r9,1),%rdi .byte 0x66 movq 0(%rdi),%rbx movq 8(%rdi),%r9 movq 16(%rdi),%r10 movq 24(%rdi),%r11 movq 32(%rdi),%r12 movq 40(%rdi),%r13 movq 48(%rdi),%r14 movq 56(%rdi),%r15 movq %rax,(%rdx) leaq 64(%rdi),%rdi .byte 0x67 movq %rbx,%r8 imulq 32+8(%rsp),%rbx movq 0(%rbp),%rax movl $8,%ecx jmp .L8x_reduce .align 32 .L8x_reduce: mulq %rbx movq 8(%rbp),%rax negq %r8 movq %rdx,%r8 adcq $0,%r8 mulq %rbx addq %rax,%r9 movq 16(%rbp),%rax adcq $0,%rdx addq %r9,%r8 movq %rbx,48-8+8(%rsp,%rcx,8) movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r10 movq 24(%rbp),%rax adcq $0,%rdx addq %r10,%r9 movq 32+8(%rsp),%rsi movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r11 movq 32(%rbp),%rax adcq $0,%rdx imulq %r8,%rsi addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r12 movq 40(%rbp),%rax adcq $0,%rdx addq %r12,%r11 movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r13 movq 48(%rbp),%rax adcq $0,%rdx addq %r13,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r14 movq 56(%rbp),%rax adcq $0,%rdx addq %r14,%r13 movq %rdx,%r14 adcq $0,%r14 mulq %rbx movq %rsi,%rbx addq %rax,%r15 movq 0(%rbp),%rax adcq $0,%rdx addq %r15,%r14 movq %rdx,%r15 adcq $0,%r15 decl %ecx jnz .L8x_reduce leaq 64(%rbp),%rbp xorq %rax,%rax movq 8+8(%rsp),%rdx cmpq 0+8(%rsp),%rbp jae .L8x_no_tail .byte 0x66 addq 0(%rdi),%r8 adcq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 sbbq %rsi,%rsi movq 48+56+8(%rsp),%rbx movl $8,%ecx movq 0(%rbp),%rax jmp .L8x_tail .align 32 .L8x_tail: mulq %rbx addq %rax,%r8 movq 8(%rbp),%rax movq %r8,(%rdi) movq %rdx,%r8 adcq $0,%r8 mulq %rbx addq %rax,%r9 movq 16(%rbp),%rax adcq $0,%rdx addq %r9,%r8 leaq 8(%rdi),%rdi movq %rdx,%r9 adcq $0,%r9 mulq %rbx addq %rax,%r10 movq 24(%rbp),%rax adcq $0,%rdx addq %r10,%r9 movq %rdx,%r10 adcq $0,%r10 mulq %rbx addq %rax,%r11 movq 32(%rbp),%rax adcq $0,%rdx addq %r11,%r10 movq %rdx,%r11 adcq $0,%r11 mulq %rbx addq %rax,%r12 movq 40(%rbp),%rax adcq $0,%rdx addq %r12,%r11 movq %rdx,%r12 adcq $0,%r12 mulq %rbx addq %rax,%r13 movq 48(%rbp),%rax adcq $0,%rdx addq %r13,%r12 movq %rdx,%r13 adcq $0,%r13 mulq %rbx addq %rax,%r14 movq 56(%rbp),%rax adcq $0,%rdx addq %r14,%r13 movq %rdx,%r14 adcq $0,%r14 mulq %rbx movq 48-16+8(%rsp,%rcx,8),%rbx addq %rax,%r15 adcq $0,%rdx addq %r15,%r14 movq 0(%rbp),%rax movq %rdx,%r15 adcq $0,%r15 decl %ecx jnz .L8x_tail leaq 64(%rbp),%rbp movq 8+8(%rsp),%rdx cmpq 0+8(%rsp),%rbp jae .L8x_tail_done movq 48+56+8(%rsp),%rbx negq %rsi movq 0(%rbp),%rax adcq 0(%rdi),%r8 adcq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 sbbq %rsi,%rsi movl $8,%ecx jmp .L8x_tail .align 32 .L8x_tail_done: addq (%rdx),%r8 adcq $0,%r9 adcq $0,%r10 adcq $0,%r11 adcq $0,%r12 adcq $0,%r13 adcq $0,%r14 adcq $0,%r15 xorq %rax,%rax negq %rsi .L8x_no_tail: adcq 0(%rdi),%r8 adcq 8(%rdi),%r9 adcq 16(%rdi),%r10 adcq 24(%rdi),%r11 adcq 32(%rdi),%r12 adcq 40(%rdi),%r13 adcq 48(%rdi),%r14 adcq 56(%rdi),%r15 adcq $0,%rax movq -8(%rbp),%rcx xorq %rsi,%rsi .byte 102,72,15,126,213 movq %r8,0(%rdi) movq %r9,8(%rdi) .byte 102,73,15,126,217 movq %r10,16(%rdi) movq %r11,24(%rdi) movq %r12,32(%rdi) movq %r13,40(%rdi) movq %r14,48(%rdi) movq %r15,56(%rdi) leaq 64(%rdi),%rdi cmpq %rdx,%rdi jb .L8x_reduction_loop .byte 0xf3,0xc3 .size bn_sqr8x_internal,.-bn_sqr8x_internal .type __bn_post4x_internal,@function .align 32 __bn_post4x_internal: movq 0(%rbp),%r12 leaq (%rdi,%r9,1),%rbx movq %r9,%rcx .byte 102,72,15,126,207 negq %rax .byte 102,72,15,126,206 sarq $3+2,%rcx decq %r12 xorq %r10,%r10 movq 8(%rbp),%r13 movq 16(%rbp),%r14 movq 24(%rbp),%r15 jmp .Lsqr4x_sub_entry .align 16 .Lsqr4x_sub: movq 0(%rbp),%r12 movq 8(%rbp),%r13 movq 16(%rbp),%r14 movq 24(%rbp),%r15 .Lsqr4x_sub_entry: leaq 32(%rbp),%rbp notq %r12 notq %r13 notq %r14 notq %r15 andq %rax,%r12 andq %rax,%r13 andq %rax,%r14 andq %rax,%r15 negq %r10 adcq 0(%rbx),%r12 adcq 8(%rbx),%r13 adcq 16(%rbx),%r14 adcq 24(%rbx),%r15 movq %r12,0(%rdi) leaq 32(%rbx),%rbx movq %r13,8(%rdi) sbbq %r10,%r10 movq %r14,16(%rdi) movq %r15,24(%rdi) leaq 32(%rdi),%rdi incq %rcx jnz .Lsqr4x_sub movq %r9,%r10 negq %r9 .byte 0xf3,0xc3 .size __bn_post4x_internal,.-__bn_post4x_internal .globl bn_from_montgomery .type bn_from_montgomery,@function .align 32 bn_from_montgomery: testl $7,%r9d jz bn_from_mont8x xorl %eax,%eax .byte 0xf3,0xc3 .size bn_from_montgomery,.-bn_from_montgomery .type bn_from_mont8x,@function .align 32 bn_from_mont8x: .byte 0x67 movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 shll $3,%r9d leaq (%r9,%r9,2),%r10 negq %r9 movq (%r8),%r8 leaq -320(%rsp,%r9,2),%r11 subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lfrom_sp_alt subq %r11,%rsp leaq -320(%rsp,%r9,2),%rsp jmp .Lfrom_sp_done .align 32 .Lfrom_sp_alt: leaq 4096-320(,%r9,2),%r10 leaq -320(%rsp,%r9,2),%rsp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 subq %r11,%rsp .Lfrom_sp_done: andq $-64,%rsp movq %rax,%r11 subq %rsp,%r11 andq $-4096,%r11 .Lfrom_page_walk: movq (%rsp,%r11,1),%r10 subq $4096,%r11 .byte 0x2e jnc .Lfrom_page_walk movq %r9,%r10 negq %r9 movq %r8,32(%rsp) movq %rax,40(%rsp) .Lfrom_body: movq %r9,%r11 leaq 48(%rsp),%rax pxor %xmm0,%xmm0 jmp .Lmul_by_1 .align 32 .Lmul_by_1: movdqu (%rsi),%xmm1 movdqu 16(%rsi),%xmm2 movdqu 32(%rsi),%xmm3 movdqa %xmm0,(%rax,%r9,1) movdqu 48(%rsi),%xmm4 movdqa %xmm0,16(%rax,%r9,1) .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 movdqa %xmm1,(%rax) movdqa %xmm0,32(%rax,%r9,1) movdqa %xmm2,16(%rax) movdqa %xmm0,48(%rax,%r9,1) movdqa %xmm3,32(%rax) movdqa %xmm4,48(%rax) leaq 64(%rax),%rax subq $64,%r11 jnz .Lmul_by_1 .byte 102,72,15,110,207 .byte 102,72,15,110,209 .byte 0x67 movq %rcx,%rbp .byte 102,73,15,110,218 + movl OPENSSL_ia32cap_P+8(%rip),%r11d + andl $0x80108,%r11d + cmpl $0x80108,%r11d + jne .Lfrom_mont_nox + + leaq (%rax,%r9,1),%rdi + call __bn_sqrx8x_reduction + call __bn_postx4x_internal + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_nox: call __bn_sqr8x_reduction call __bn_post4x_internal pxor %xmm0,%xmm0 leaq 48(%rsp),%rax movq 40(%rsp),%rsi jmp .Lfrom_mont_zero .align 32 .Lfrom_mont_zero: movdqa %xmm0,0(%rax) movdqa %xmm0,16(%rax) movdqa %xmm0,32(%rax) movdqa %xmm0,48(%rax) leaq 64(%rax),%rax subq $32,%r9 jnz .Lfrom_mont_zero movq $1,%rax movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp .Lfrom_epilogue: .byte 0xf3,0xc3 .size bn_from_mont8x,.-bn_from_mont8x +.type bn_mulx4x_mont_gather5,@function +.align 32 +bn_mulx4x_mont_gather5: +.Lmulx4x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmulx4xsp_alt + subq %r11,%rsp + leaq -320(%rsp,%r9,2),%rsp + jmp .Lmulx4xsp_done + +.Lmulx4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lmulx4xsp_done: + andq $-64,%rsp + movq %rax,%r11 + subq %rsp,%r11 + andq $-4096,%r11 +.Lmulx4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + jnc .Lmulx4x_page_walk + + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lmulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmulx4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 + +.type mulx4x_internal,@function +.align 32 +mulx4x_internal: + movq %r9,8(%rsp) + movq %r9,%r10 + negq %r9 + shlq $5,%r9 + negq %r10 + leaq 128(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movd 8(%rax),%xmm5 + subq $1,%r9 + leaq .Linc(%rip),%rax + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r10,1),%r10 + leaq 128(%rdx),%rdi + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67 + movdqa %xmm1,%xmm2 +.byte 0x67 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 +.byte 0x67 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + + pand 64(%rdi),%xmm0 + pand 80(%rdi),%xmm1 + pand 96(%rdi),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%rdi),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%rdi),%xmm4 + movdqa -112(%rdi),%xmm5 + movdqa -96(%rdi),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%rdi),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%rdi),%xmm4 + movdqa -48(%rdi),%xmm5 + movdqa -32(%rdi),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%rdi),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%rdi),%xmm4 + movdqa 16(%rdi),%xmm5 + movdqa 32(%rdi),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%rdi),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + pxor %xmm1,%xmm0 + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + leaq 64+32+8(%rsp),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + movq %rdi,8+8(%rsp) + + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 8(%rsp),%rax + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + leaq 16-256(%rbx),%r10 + pxor %xmm4,%xmm4 +.byte 0x67,0x67 + pxor %xmm5,%xmm5 + movdqa -128(%rdi),%xmm0 + movdqa -112(%rdi),%xmm1 + movdqa -96(%rdi),%xmm2 + pand 256(%r10),%xmm0 + movdqa -80(%rdi),%xmm3 + pand 272(%r10),%xmm1 + por %xmm0,%xmm4 + pand 288(%r10),%xmm2 + por %xmm1,%xmm5 + pand 304(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%rdi),%xmm0 + movdqa -48(%rdi),%xmm1 + movdqa -32(%rdi),%xmm2 + pand 320(%r10),%xmm0 + movdqa -16(%rdi),%xmm3 + pand 336(%r10),%xmm1 + por %xmm0,%xmm4 + pand 352(%r10),%xmm2 + por %xmm1,%xmm5 + pand 368(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%rdi),%xmm0 + movdqa 16(%rdi),%xmm1 + movdqa 32(%rdi),%xmm2 + pand 384(%r10),%xmm0 + movdqa 48(%rdi),%xmm3 + pand 400(%r10),%xmm1 + por %xmm0,%xmm4 + pand 416(%r10),%xmm2 + por %xmm1,%xmm5 + pand 432(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%rdi),%xmm0 + movdqa 80(%rdi),%xmm1 + movdqa 96(%rdi),%xmm2 + pand 448(%r10),%xmm0 + movdqa 112(%rdi),%xmm3 + pand 464(%r10),%xmm1 + por %xmm0,%xmm4 + pand 480(%r10),%xmm2 + por %xmm1,%xmm5 + pand 496(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,1),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq %r8,%rdx + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 32(%rcx),%rcx + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 32(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0+8(%rsp),%rax + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb .Lmulx4x_outer + + movq -8(%rcx),%r10 + movq %rbp,%r8 + movq (%rcx,%rax,1),%r12 + leaq (%rcx,%rax,1),%rbp + movq %rax,%rcx + leaq (%rbx,%rax,1),%rdi + xorl %eax,%eax + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%r8 + sarq $3+2,%rcx + subq %r8,%rax + movq 56+8(%rsp),%rdx + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry +.size mulx4x_internal,.-mulx4x_internal +.type bn_powerx5,@function +.align 32 +bn_powerx5: +.Lpowerx5_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwrx_sp_alt + subq %r11,%rsp + leaq -320(%rsp,%r9,2),%rsp + jmp .Lpwrx_sp_done + +.align 32 +.Lpwrx_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lpwrx_sp_done: + andq $-64,%rsp + movq %rax,%r11 + subq %rsp,%r11 + andq $-4096,%r11 +.Lpwrx_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + jnc .Lpwrx_page_walk + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lpowerx5_body: + + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lpowerx5_epilogue: + .byte 0xf3,0xc3 +.size bn_powerx5,.-bn_powerx5 + +.globl bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.type bn_sqrx8x_internal,@function +.align 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp .Lsqr8x_zero_start + +.align 32 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +.Lsqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +.Lsqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz .Lsqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je .Lsqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz .Lsqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je .Lsqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_break: + subq 16+8(%rsp),%r8 + movq 24+8(%rsp),%rcx + movq 0(%rsi),%rdx + xorl %ebp,%ebp + movq %r8,0(%rdi) + cmpq %rcx,%rdi + je .Lsqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.align 32 +.Lsqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz .Lsqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp .Lsqrx4x_shift_n_add + +.align 32 +.Lsqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +__bn_sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -64(%rbp,%r9,1),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp .Lsqrx8x_reduction_loop + +.align 32 +.Lsqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp .Lsqrx8x_reduce + +.align 32 +.Lsqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz .Lsqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 64(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz .Lsqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 64(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail_done: + addq 24+8(%rsp),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + + + movq %rsi,%rax + + subq 16+8(%rsp),%rsi +.Lsqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 56(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq %rax,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb .Lsqrx8x_reduction_loop + .byte 0xf3,0xc3 +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.align 32 +__bn_postx4x_internal: + movq 0(%rbp),%r12 + movq %rcx,%r10 + movq %rcx,%r9 + negq %rax + sarq $3+2,%rcx + +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry + +.align 16 +.Lsqrx4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +.Lsqrx4x_sub_entry: + andnq %rax,%r12,%r12 + leaq 32(%rbp),%rbp + andnq %rax,%r13,%r13 + andnq %rax,%r14,%r14 + andnq %rax,%r15,%r15 + + negq %r8 + adcq 0(%rdi),%r12 + adcq 8(%rdi),%r13 + adcq 16(%rdi),%r14 + adcq 24(%rdi),%r15 + movq %r12,0(%rdx) + leaq 32(%rdi),%rdi + movq %r13,8(%rdx) + sbbq %r8,%r8 + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz .Lsqrx4x_sub + + negq %r9 + + .byte 0xf3,0xc3 +.size __bn_postx4x_internal,.-__bn_postx4x_internal .globl bn_get_bits5 .type bn_get_bits5,@function .align 16 bn_get_bits5: leaq 0(%rdi),%r10 leaq 1(%rdi),%r11 movl %esi,%ecx shrl $4,%esi andl $15,%ecx leal -8(%rcx),%eax cmpl $11,%ecx cmovaq %r11,%r10 cmoval %eax,%ecx movzwl (%r10,%rsi,2),%eax shrl %cl,%eax andl $31,%eax .byte 0xf3,0xc3 .size bn_get_bits5,.-bn_get_bits5 .globl bn_scatter5 .type bn_scatter5,@function .align 16 bn_scatter5: cmpl $0,%esi jz .Lscatter_epilogue leaq (%rdx,%rcx,8),%rdx .Lscatter: movq (%rdi),%rax leaq 8(%rdi),%rdi movq %rax,(%rdx) leaq 256(%rdx),%rdx subl $1,%esi jnz .Lscatter .Lscatter_epilogue: .byte 0xf3,0xc3 .size bn_scatter5,.-bn_scatter5 .globl bn_gather5 .type bn_gather5,@function .align 32 bn_gather5: .LSEH_begin_bn_gather5: .byte 0x4c,0x8d,0x14,0x24 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 leaq .Linc(%rip),%rax andq $-16,%rsp movd %ecx,%xmm5 movdqa 0(%rax),%xmm0 movdqa 16(%rax),%xmm1 leaq 128(%rdx),%r11 leaq 128(%rsp),%rax pshufd $0,%xmm5,%xmm5 movdqa %xmm1,%xmm4 movdqa %xmm1,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,-128(%rax) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,-112(%rax) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,-96(%rax) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,-80(%rax) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,-64(%rax) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,-48(%rax) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,-32(%rax) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,-16(%rax) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,0(%rax) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,16(%rax) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,32(%rax) movdqa %xmm4,%xmm2 paddd %xmm0,%xmm1 pcmpeqd %xmm5,%xmm0 movdqa %xmm3,48(%rax) movdqa %xmm4,%xmm3 paddd %xmm1,%xmm2 pcmpeqd %xmm5,%xmm1 movdqa %xmm0,64(%rax) movdqa %xmm4,%xmm0 paddd %xmm2,%xmm3 pcmpeqd %xmm5,%xmm2 movdqa %xmm1,80(%rax) movdqa %xmm4,%xmm1 paddd %xmm3,%xmm0 pcmpeqd %xmm5,%xmm3 movdqa %xmm2,96(%rax) movdqa %xmm4,%xmm2 movdqa %xmm3,112(%rax) jmp .Lgather .align 32 .Lgather: pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 movdqa -128(%r11),%xmm0 movdqa -112(%r11),%xmm1 movdqa -96(%r11),%xmm2 pand -128(%rax),%xmm0 movdqa -80(%r11),%xmm3 pand -112(%rax),%xmm1 por %xmm0,%xmm4 pand -96(%rax),%xmm2 por %xmm1,%xmm5 pand -80(%rax),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa -64(%r11),%xmm0 movdqa -48(%r11),%xmm1 movdqa -32(%r11),%xmm2 pand -64(%rax),%xmm0 movdqa -16(%r11),%xmm3 pand -48(%rax),%xmm1 por %xmm0,%xmm4 pand -32(%rax),%xmm2 por %xmm1,%xmm5 pand -16(%rax),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 0(%r11),%xmm0 movdqa 16(%r11),%xmm1 movdqa 32(%r11),%xmm2 pand 0(%rax),%xmm0 movdqa 48(%r11),%xmm3 pand 16(%rax),%xmm1 por %xmm0,%xmm4 pand 32(%rax),%xmm2 por %xmm1,%xmm5 pand 48(%rax),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 movdqa 64(%r11),%xmm0 movdqa 80(%r11),%xmm1 movdqa 96(%r11),%xmm2 pand 64(%rax),%xmm0 movdqa 112(%r11),%xmm3 pand 80(%rax),%xmm1 por %xmm0,%xmm4 pand 96(%rax),%xmm2 por %xmm1,%xmm5 pand 112(%rax),%xmm3 por %xmm2,%xmm4 por %xmm3,%xmm5 por %xmm5,%xmm4 leaq 256(%r11),%r11 pshufd $0x4e,%xmm4,%xmm0 por %xmm4,%xmm0 movq %xmm0,(%rdi) leaq 8(%rdi),%rdi subl $1,%esi jnz .Lgather leaq (%r10),%rsp .byte 0xf3,0xc3 .LSEH_end_bn_gather5: .size bn_gather5,.-bn_gather5 .align 64 .Linc: .long 0,0, 1,1 .long 2,2, 2,2 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Index: head/secure/lib/libcrypto/amd64/x86_64cpuid.S =================================================================== --- head/secure/lib/libcrypto/amd64/x86_64cpuid.S (revision 299480) +++ head/secure/lib/libcrypto/amd64/x86_64cpuid.S (revision 299481) @@ -1,260 +1,261 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86_64cpuid.pl. .hidden OPENSSL_cpuid_setup .section .init call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P .comm OPENSSL_ia32cap_P,16,4 .text .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 OPENSSL_atomic_add: movl (%rdi),%eax .Lspin: leaq (%rsi,%rax,1),%r8 .byte 0xf0 cmpxchgl %r8d,(%rdi) jne .Lspin movl %r8d,%eax .byte 0x48,0x98 .byte 0xf3,0xc3 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add .globl OPENSSL_rdtsc .type OPENSSL_rdtsc,@function .align 16 OPENSSL_rdtsc: rdtsc shlq $32,%rdx orq %rdx,%rax .byte 0xf3,0xc3 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc .globl OPENSSL_ia32_cpuid .type OPENSSL_ia32_cpuid,@function .align 16 OPENSSL_ia32_cpuid: movq %rbx,%r8 xorl %eax,%eax movl %eax,8(%rdi) cpuid movl %eax,%r11d xorl %eax,%eax cmpl $0x756e6547,%ebx setne %al movl %eax,%r9d cmpl $0x49656e69,%edx setne %al orl %eax,%r9d cmpl $0x6c65746e,%ecx setne %al orl %eax,%r9d jz .Lintel cmpl $0x68747541,%ebx setne %al movl %eax,%r10d cmpl $0x69746E65,%edx setne %al orl %eax,%r10d cmpl $0x444D4163,%ecx setne %al orl %eax,%r10d jnz .Lintel movl $0x80000000,%eax cpuid cmpl $0x80000001,%eax jb .Lintel movl %eax,%r10d movl $0x80000001,%eax cpuid orl %ecx,%r9d andl $0x00000801,%r9d cmpl $0x80000008,%r10d jb .Lintel movl $0x80000008,%eax cpuid movzbq %cl,%r10 incq %r10 movl $1,%eax cpuid btl $28,%edx jnc .Lgeneric shrl $16,%ebx cmpb %r10b,%bl ja .Lgeneric andl $0xefffffff,%edx jmp .Lgeneric .Lintel: cmpl $4,%r11d movl $-1,%r10d jb .Lnocacheinfo movl $4,%eax movl $0,%ecx cpuid movl %eax,%r10d shrl $14,%r10d andl $0xfff,%r10d cmpl $7,%r11d jb .Lnocacheinfo movl $7,%eax xorl %ecx,%ecx cpuid movl %ebx,8(%rdi) .Lnocacheinfo: movl $1,%eax cpuid andl $0xbfefffff,%edx cmpl $0,%r9d jne .Lnotintel orl $0x40000000,%edx andb $15,%ah cmpb $15,%ah jne .Lnotintel orl $0x00100000,%edx .Lnotintel: btl $28,%edx jnc .Lgeneric andl $0xefffffff,%edx cmpl $0,%r10d je .Lgeneric orl $0x10000000,%edx shrl $16,%ebx cmpb $1,%bl ja .Lgeneric andl $0xefffffff,%edx .Lgeneric: andl $0x00000800,%r9d andl $0xfffff7ff,%ecx orl %ecx,%r9d movl %edx,%r10d btl $27,%r9d jnc .Lclear_avx xorl %ecx,%ecx .byte 0x0f,0x01,0xd0 andl $6,%eax cmpl $6,%eax je .Ldone .Lclear_avx: movl $0xefffe7ff,%eax andl %eax,%r9d andl $0xffffffdf,8(%rdi) .Ldone: shlq $32,%r9 movl %r10d,%eax movq %r8,%rbx orq %r9,%rax .byte 0xf3,0xc3 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid .globl OPENSSL_cleanse .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: xorq %rax,%rax cmpq $15,%rsi jae .Lot cmpq $0,%rsi je .Lret .Little: movb %al,(%rdi) subq $1,%rsi leaq 1(%rdi),%rdi jnz .Little .Lret: .byte 0xf3,0xc3 .align 16 .Lot: testq $7,%rdi jz .Laligned movb %al,(%rdi) leaq -1(%rsi),%rsi leaq 1(%rdi),%rdi jmp .Lot .Laligned: movq %rax,(%rdi) leaq -8(%rsi),%rsi testq $-8,%rsi leaq 8(%rdi),%rdi jnz .Laligned cmpq $0,%rsi jne .Little .byte 0xf3,0xc3 .size OPENSSL_cleanse,.-OPENSSL_cleanse .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 16 OPENSSL_wipe_cpu: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 pxor %xmm8,%xmm8 pxor %xmm9,%xmm9 pxor %xmm10,%xmm10 pxor %xmm11,%xmm11 pxor %xmm12,%xmm12 pxor %xmm13,%xmm13 pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 xorq %rcx,%rcx xorq %rdx,%rdx xorq %rsi,%rsi xorq %rdi,%rdi xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 xorq %r11,%r11 leaq 8(%rsp),%rax .byte 0xf3,0xc3 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu .globl OPENSSL_ia32_rdrand .type OPENSSL_ia32_rdrand,@function .align 16 OPENSSL_ia32_rdrand: movl $8,%ecx .Loop_rdrand: .byte 72,15,199,240 jc .Lbreak_rdrand loop .Loop_rdrand .Lbreak_rdrand: cmpq $0,%rax cmoveq %rcx,%rax .byte 0xf3,0xc3 .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand .globl OPENSSL_ia32_rdseed .type OPENSSL_ia32_rdseed,@function .align 16 OPENSSL_ia32_rdseed: movl $8,%ecx .Loop_rdseed: .byte 72,15,199,248 jc .Lbreak_rdseed loop .Loop_rdseed .Lbreak_rdseed: cmpq $0,%rax cmoveq %rcx,%rax .byte 0xf3,0xc3 .size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed Index: head/secure/lib/libcrypto/i386/aes-586.S =================================================================== --- head/secure/lib/libcrypto/i386/aes-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/aes-586.S (revision 299481) @@ -1,6492 +1,6493 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aes-586.pl. #ifdef PIC .file "aes-586.S" .text .type _x86_AES_encrypt_compact,@function .align 16 _x86_AES_encrypt_compact: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi .align 16 .L000loop: movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movzbl -128(%ebp,%esi,1),%esi movzbl %ch,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax shll $8,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movzbl -128(%ebp,%ecx,1),%ecx shll $24,%ecx xorl %ecx,%edx movl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %ecx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %ecx,%edi xorl %esi,%ecx rorl $24,%edi xorl %ebp,%esi roll $24,%ecx xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%ecx andl %edx,%ebp leal (%edx,%edx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %edx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %edx,%edi xorl %esi,%edx rorl $24,%edi xorl %ebp,%esi roll $24,%edx xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%edx andl %eax,%ebp leal (%eax,%eax,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %eax,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %eax,%edi xorl %esi,%eax rorl $24,%edi xorl %ebp,%esi roll $24,%eax xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%eax andl %ebx,%ebp leal (%ebx,%ebx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %ebx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %ebx,%edi xorl %esi,%ebx rorl $24,%edi xorl %ebp,%esi roll $24,%ebx xorl %edi,%esi xorl %esi,%ebx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L000loop movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movzbl -128(%ebp,%esi,1),%esi movzbl %ch,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax shll $8,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movzbl -128(%ebp,%ecx,1),%ecx shll $24,%ecx xorl %ecx,%edx movl %esi,%ecx xorl 16(%edi),%eax xorl 20(%edi),%ebx xorl 24(%edi),%ecx xorl 28(%edi),%edx ret .size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact .type _sse_AES_encrypt_compact,@function .align 16 _sse_AES_encrypt_compact: pxor (%edi),%mm0 pxor 8(%edi),%mm4 movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl $454761243,%eax movl %eax,8(%esp) movl %eax,12(%esp) movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx .align 16 .L001loop: pshufw $8,%mm0,%mm1 pshufw $13,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx movl %edi,20(%esp) movzbl %al,%esi movzbl %ah,%edx pshufw $13,%mm0,%mm2 movzbl -128(%ebp,%esi,1),%ecx movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx shrl $16,%eax shll $8,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $16,%esi pshufw $8,%mm4,%mm6 orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $24,%esi shrl $16,%ebx orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $8,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi movd %mm2,%eax movd %ecx,%mm0 movzbl -128(%ebp,%edi,1),%ecx movzbl %ah,%edi shll $16,%ecx movd %mm6,%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi shll $8,%esi shrl $16,%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shrl $16,%eax movd %ecx,%mm1 movzbl -128(%ebp,%edi,1),%ecx movzbl %ah,%edi shll $16,%ecx andl $255,%eax orl %esi,%ecx punpckldq %mm1,%mm0 movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi andl $255,%ebx movzbl -128(%ebp,%eax,1),%eax orl %esi,%ecx shll $16,%eax movzbl -128(%ebp,%edi,1),%esi orl %eax,%edx shll $8,%esi movzbl -128(%ebp,%ebx,1),%ebx orl %esi,%ecx orl %ebx,%edx movl 20(%esp),%edi movd %ecx,%mm4 movd %edx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi cmpl 24(%esp),%edi ja .L002out movq 8(%esp),%mm2 pxor %mm3,%mm3 pxor %mm7,%mm7 movq %mm0,%mm1 movq %mm4,%mm5 pcmpgtb %mm0,%mm3 pcmpgtb %mm4,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 pshufw $177,%mm0,%mm2 pshufw $177,%mm4,%mm6 paddb %mm0,%mm0 paddb %mm4,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pshufw $177,%mm2,%mm3 pshufw $177,%mm6,%mm7 pxor %mm0,%mm1 pxor %mm4,%mm5 pxor %mm2,%mm0 pxor %mm6,%mm4 movq %mm3,%mm2 movq %mm7,%mm6 pslld $8,%mm3 pslld $8,%mm7 psrld $24,%mm2 psrld $24,%mm6 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 movq %mm1,%mm3 movq %mm5,%mm7 movq (%edi),%mm2 movq 8(%edi),%mm6 psrld $8,%mm1 psrld $8,%mm5 movl -128(%ebp),%eax pslld $24,%mm3 pslld $24,%mm7 movl -64(%ebp),%ebx pxor %mm1,%mm0 pxor %mm5,%mm4 movl (%ebp),%ecx pxor %mm3,%mm0 pxor %mm7,%mm4 movl 64(%ebp),%edx pxor %mm2,%mm0 pxor %mm6,%mm4 jmp .L001loop .align 16 .L002out: pxor (%edi),%mm0 pxor 8(%edi),%mm4 ret .size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact .type _x86_AES_encrypt,@function .align 16 _x86_AES_encrypt: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) .align 16 .L003loop: movl %eax,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %bh,%edi xorl 3(%ebp,%edi,8),%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %edx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movl (%ebp,%esi,8),%esi movzbl %ch,%edi xorl 3(%ebp,%edi,8),%esi movl %edx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %eax,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movl (%ebp,%esi,8),%esi movzbl %dh,%edi xorl 3(%ebp,%edi,8),%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi xorl 2(%ebp,%edi,8),%esi movzbl %bh,%edi xorl 1(%ebp,%edi,8),%esi movl 20(%esp),%edi movl (%ebp,%edx,8),%edx movzbl %ah,%eax xorl 3(%ebp,%eax,8),%edx movl 4(%esp),%eax andl $255,%ebx xorl 2(%ebp,%ebx,8),%edx movl 8(%esp),%ebx xorl 1(%ebp,%ecx,8),%edx movl %esi,%ecx addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L003loop movl %eax,%esi andl $255,%esi movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %bh,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %ch,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %dh,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movzbl %bh,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movl 2(%ebp,%edx,8),%edx andl $255,%edx movzbl %ah,%eax movl (%ebp,%eax,8),%eax andl $65280,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movl (%ebp,%ebx,8),%ebx andl $16711680,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movl 2(%ebp,%ecx,8),%ecx andl $4278190080,%ecx xorl %ecx,%edx movl %esi,%ecx addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .align 64 .LAES_Te: .long 2774754246,2774754246 .long 2222750968,2222750968 .long 2574743534,2574743534 .long 2373680118,2373680118 .long 234025727,234025727 .long 3177933782,3177933782 .long 2976870366,2976870366 .long 1422247313,1422247313 .long 1345335392,1345335392 .long 50397442,50397442 .long 2842126286,2842126286 .long 2099981142,2099981142 .long 436141799,436141799 .long 1658312629,1658312629 .long 3870010189,3870010189 .long 2591454956,2591454956 .long 1170918031,1170918031 .long 2642575903,2642575903 .long 1086966153,1086966153 .long 2273148410,2273148410 .long 368769775,368769775 .long 3948501426,3948501426 .long 3376891790,3376891790 .long 200339707,200339707 .long 3970805057,3970805057 .long 1742001331,1742001331 .long 4255294047,4255294047 .long 3937382213,3937382213 .long 3214711843,3214711843 .long 4154762323,4154762323 .long 2524082916,2524082916 .long 1539358875,1539358875 .long 3266819957,3266819957 .long 486407649,486407649 .long 2928907069,2928907069 .long 1780885068,1780885068 .long 1513502316,1513502316 .long 1094664062,1094664062 .long 49805301,49805301 .long 1338821763,1338821763 .long 1546925160,1546925160 .long 4104496465,4104496465 .long 887481809,887481809 .long 150073849,150073849 .long 2473685474,2473685474 .long 1943591083,1943591083 .long 1395732834,1395732834 .long 1058346282,1058346282 .long 201589768,201589768 .long 1388824469,1388824469 .long 1696801606,1696801606 .long 1589887901,1589887901 .long 672667696,672667696 .long 2711000631,2711000631 .long 251987210,251987210 .long 3046808111,3046808111 .long 151455502,151455502 .long 907153956,907153956 .long 2608889883,2608889883 .long 1038279391,1038279391 .long 652995533,652995533 .long 1764173646,1764173646 .long 3451040383,3451040383 .long 2675275242,2675275242 .long 453576978,453576978 .long 2659418909,2659418909 .long 1949051992,1949051992 .long 773462580,773462580 .long 756751158,756751158 .long 2993581788,2993581788 .long 3998898868,3998898868 .long 4221608027,4221608027 .long 4132590244,4132590244 .long 1295727478,1295727478 .long 1641469623,1641469623 .long 3467883389,3467883389 .long 2066295122,2066295122 .long 1055122397,1055122397 .long 1898917726,1898917726 .long 2542044179,2542044179 .long 4115878822,4115878822 .long 1758581177,1758581177 .long 0,0 .long 753790401,753790401 .long 1612718144,1612718144 .long 536673507,536673507 .long 3367088505,3367088505 .long 3982187446,3982187446 .long 3194645204,3194645204 .long 1187761037,1187761037 .long 3653156455,3653156455 .long 1262041458,1262041458 .long 3729410708,3729410708 .long 3561770136,3561770136 .long 3898103984,3898103984 .long 1255133061,1255133061 .long 1808847035,1808847035 .long 720367557,720367557 .long 3853167183,3853167183 .long 385612781,385612781 .long 3309519750,3309519750 .long 3612167578,3612167578 .long 1429418854,1429418854 .long 2491778321,2491778321 .long 3477423498,3477423498 .long 284817897,284817897 .long 100794884,100794884 .long 2172616702,2172616702 .long 4031795360,4031795360 .long 1144798328,1144798328 .long 3131023141,3131023141 .long 3819481163,3819481163 .long 4082192802,4082192802 .long 4272137053,4272137053 .long 3225436288,3225436288 .long 2324664069,2324664069 .long 2912064063,2912064063 .long 3164445985,3164445985 .long 1211644016,1211644016 .long 83228145,83228145 .long 3753688163,3753688163 .long 3249976951,3249976951 .long 1977277103,1977277103 .long 1663115586,1663115586 .long 806359072,806359072 .long 452984805,452984805 .long 250868733,250868733 .long 1842533055,1842533055 .long 1288555905,1288555905 .long 336333848,336333848 .long 890442534,890442534 .long 804056259,804056259 .long 3781124030,3781124030 .long 2727843637,2727843637 .long 3427026056,3427026056 .long 957814574,957814574 .long 1472513171,1472513171 .long 4071073621,4071073621 .long 2189328124,2189328124 .long 1195195770,1195195770 .long 2892260552,2892260552 .long 3881655738,3881655738 .long 723065138,723065138 .long 2507371494,2507371494 .long 2690670784,2690670784 .long 2558624025,2558624025 .long 3511635870,3511635870 .long 2145180835,2145180835 .long 1713513028,1713513028 .long 2116692564,2116692564 .long 2878378043,2878378043 .long 2206763019,2206763019 .long 3393603212,3393603212 .long 703524551,703524551 .long 3552098411,3552098411 .long 1007948840,1007948840 .long 2044649127,2044649127 .long 3797835452,3797835452 .long 487262998,487262998 .long 1994120109,1994120109 .long 1004593371,1004593371 .long 1446130276,1446130276 .long 1312438900,1312438900 .long 503974420,503974420 .long 3679013266,3679013266 .long 168166924,168166924 .long 1814307912,1814307912 .long 3831258296,3831258296 .long 1573044895,1573044895 .long 1859376061,1859376061 .long 4021070915,4021070915 .long 2791465668,2791465668 .long 2828112185,2828112185 .long 2761266481,2761266481 .long 937747667,937747667 .long 2339994098,2339994098 .long 854058965,854058965 .long 1137232011,1137232011 .long 1496790894,1496790894 .long 3077402074,3077402074 .long 2358086913,2358086913 .long 1691735473,1691735473 .long 3528347292,3528347292 .long 3769215305,3769215305 .long 3027004632,3027004632 .long 4199962284,4199962284 .long 133494003,133494003 .long 636152527,636152527 .long 2942657994,2942657994 .long 2390391540,2390391540 .long 3920539207,3920539207 .long 403179536,403179536 .long 3585784431,3585784431 .long 2289596656,2289596656 .long 1864705354,1864705354 .long 1915629148,1915629148 .long 605822008,605822008 .long 4054230615,4054230615 .long 3350508659,3350508659 .long 1371981463,1371981463 .long 602466507,602466507 .long 2094914977,2094914977 .long 2624877800,2624877800 .long 555687742,555687742 .long 3712699286,3712699286 .long 3703422305,3703422305 .long 2257292045,2257292045 .long 2240449039,2240449039 .long 2423288032,2423288032 .long 1111375484,1111375484 .long 3300242801,3300242801 .long 2858837708,2858837708 .long 3628615824,3628615824 .long 84083462,84083462 .long 32962295,32962295 .long 302911004,302911004 .long 2741068226,2741068226 .long 1597322602,1597322602 .long 4183250862,4183250862 .long 3501832553,3501832553 .long 2441512471,2441512471 .long 1489093017,1489093017 .long 656219450,656219450 .long 3114180135,3114180135 .long 954327513,954327513 .long 335083755,335083755 .long 3013122091,3013122091 .long 856756514,856756514 .long 3144247762,3144247762 .long 1893325225,1893325225 .long 2307821063,2307821063 .long 2811532339,2811532339 .long 3063651117,3063651117 .long 572399164,572399164 .long 2458355477,2458355477 .long 552200649,552200649 .long 1238290055,1238290055 .long 4283782570,4283782570 .long 2015897680,2015897680 .long 2061492133,2061492133 .long 2408352771,2408352771 .long 4171342169,4171342169 .long 2156497161,2156497161 .long 386731290,386731290 .long 3669999461,3669999461 .long 837215959,837215959 .long 3326231172,3326231172 .long 3093850320,3093850320 .long 3275833730,3275833730 .long 2962856233,2962856233 .long 1999449434,1999449434 .long 286199582,286199582 .long 3417354363,3417354363 .long 4233385128,4233385128 .long 3602627437,3602627437 .long 974525996,974525996 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .long 1,2,4,8 .long 16,32,64,128 .long 27,54,0,0 .long 0,0,0,0 .size _x86_AES_encrypt,.-_x86_AES_encrypt .globl AES_encrypt .type AES_encrypt,@function .align 16 AES_encrypt: .L_AES_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%eax subl $36,%esp andl $-64,%esp leal -127(%edi),%ebx subl %esp,%ebx negl %ebx andl $960,%ebx subl %ebx,%esp addl $4,%esp movl %eax,28(%esp) call .L004pic_point .L004pic_point: popl %ebp leal OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax leal .LAES_Te-.L004pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp btl $25,(%eax) jnc .L005x86 movq (%esi),%mm0 movq 8(%esi),%mm4 call _sse_AES_encrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L005x86: movl %ebp,24(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx call _x86_AES_encrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size AES_encrypt,.-.L_AES_encrypt_begin .type _x86_AES_decrypt_compact,@function .align 16 _x86_AES_decrypt_compact: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi .align 16 .L006loop: movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %ah,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl -128(%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx shrl $24,%eax movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%eax subl %edi,%esi andl $4278124286,%eax andl $454761243,%esi xorl %esi,%eax movl $2155905152,%edi andl %eax,%edi movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %ecx,%eax xorl %esi,%ebx movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %ecx,%ebx roll $8,%ecx xorl %esi,%ebp xorl %eax,%ecx xorl %ebp,%eax xorl %ebx,%ecx xorl %ebp,%ebx roll $24,%eax xorl %ebp,%ecx roll $16,%ebx xorl %eax,%ecx roll $8,%ebp xorl %ebx,%ecx movl 4(%esp),%eax xorl %ebp,%ecx movl %ecx,12(%esp) movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %esi,%ebx movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %edx,%ebx xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %edx,%ecx roll $8,%edx xorl %esi,%ebp xorl %ebx,%edx xorl %ebp,%ebx xorl %ecx,%edx xorl %ebp,%ecx roll $24,%ebx xorl %ebp,%edx roll $16,%ecx xorl %ebx,%edx roll $8,%ebp xorl %ecx,%edx movl 8(%esp),%ebx xorl %ebp,%edx movl %edx,16(%esp) movl $2155905152,%edi andl %eax,%edi movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %eax,%ecx xorl %esi,%edx movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %eax,%edx roll $8,%eax xorl %esi,%ebp xorl %ecx,%eax xorl %ebp,%ecx xorl %edx,%eax xorl %ebp,%edx roll $24,%ecx xorl %ebp,%eax roll $16,%edx xorl %ecx,%eax roll $8,%ebp xorl %edx,%eax xorl %ebp,%eax movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx xorl %esi,%edx movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %ebx,%edx roll $8,%ebx xorl %esi,%ebp xorl %ecx,%ebx xorl %ebp,%ecx xorl %edx,%ebx xorl %ebp,%edx roll $24,%ecx xorl %ebp,%ebx roll $16,%edx xorl %ecx,%ebx roll $8,%ebp xorl %edx,%ebx movl 12(%esp),%ecx xorl %ebp,%ebx movl 16(%esp),%edx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L006loop movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %ah,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl -128(%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx shrl $24,%eax movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl 4(%esp),%eax xorl 16(%edi),%eax xorl 20(%edi),%ebx xorl 24(%edi),%ecx xorl 28(%edi),%edx ret .size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact .type _sse_AES_decrypt_compact,@function .align 16 _sse_AES_decrypt_compact: pxor (%edi),%mm0 pxor 8(%edi),%mm4 movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl $454761243,%eax movl %eax,8(%esp) movl %eax,12(%esp) movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx .align 16 .L007loop: pshufw $12,%mm0,%mm1 pshufw $9,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx movl %edi,20(%esp) movzbl %al,%esi movzbl %ah,%edx pshufw $6,%mm0,%mm2 movzbl -128(%ebp,%esi,1),%ecx movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx shrl $16,%eax shll $8,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $16,%esi pshufw $3,%mm4,%mm6 orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $24,%esi shrl $16,%ebx orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $8,%esi movd %mm2,%eax orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi shll $16,%esi movd %mm6,%ebx movd %ecx,%mm0 movzbl -128(%ebp,%edi,1),%ecx movzbl %al,%edi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $16,%esi shrl $16,%eax orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shrl $16,%ebx shll $8,%esi movd %edx,%mm1 movzbl -128(%ebp,%edi,1),%edx movzbl %bh,%edi shll $24,%edx andl $255,%ebx orl %esi,%edx punpckldq %mm1,%mm0 movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $8,%esi movzbl %ah,%eax movzbl -128(%ebp,%ebx,1),%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi orl %ebx,%edx shll $16,%esi movzbl -128(%ebp,%eax,1),%eax orl %esi,%edx shll $24,%eax orl %eax,%ecx movl 20(%esp),%edi movd %edx,%mm4 movd %ecx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi cmpl 24(%esp),%edi ja .L008out movq %mm0,%mm3 movq %mm4,%mm7 pshufw $228,%mm0,%mm2 pshufw $228,%mm4,%mm6 movq %mm0,%mm1 movq %mm4,%mm5 pshufw $177,%mm0,%mm0 pshufw $177,%mm4,%mm4 pslld $8,%mm2 pslld $8,%mm6 psrld $8,%mm3 psrld $8,%mm7 pxor %mm2,%mm0 pxor %mm6,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pslld $16,%mm2 pslld $16,%mm6 psrld $16,%mm3 psrld $16,%mm7 pxor %mm2,%mm0 pxor %mm6,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 movq 8(%esp),%mm3 pxor %mm2,%mm2 pxor %mm6,%mm6 pcmpgtb %mm1,%mm2 pcmpgtb %mm5,%mm6 pand %mm3,%mm2 pand %mm3,%mm6 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm2,%mm1 pxor %mm6,%mm5 movq %mm1,%mm3 movq %mm5,%mm7 movq %mm1,%mm2 movq %mm5,%mm6 pxor %mm1,%mm0 pxor %mm5,%mm4 pslld $24,%mm3 pslld $24,%mm7 psrld $8,%mm2 psrld $8,%mm6 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 movq 8(%esp),%mm2 pxor %mm3,%mm3 pxor %mm7,%mm7 pcmpgtb %mm1,%mm3 pcmpgtb %mm5,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm3,%mm1 pxor %mm7,%mm5 pshufw $177,%mm1,%mm3 pshufw $177,%mm5,%mm7 pxor %mm1,%mm0 pxor %mm5,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm3,%mm3 pxor %mm7,%mm7 pcmpgtb %mm1,%mm3 pcmpgtb %mm5,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm3,%mm1 pxor %mm7,%mm5 pxor %mm1,%mm0 pxor %mm5,%mm4 movq %mm1,%mm3 movq %mm5,%mm7 pshufw $177,%mm1,%mm2 pshufw $177,%mm5,%mm6 pxor %mm2,%mm0 pxor %mm6,%mm4 pslld $8,%mm1 pslld $8,%mm5 psrld $8,%mm3 psrld $8,%mm7 movq (%edi),%mm2 movq 8(%edi),%mm6 pxor %mm1,%mm0 pxor %mm5,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 movl -128(%ebp),%eax pslld $16,%mm1 pslld $16,%mm5 movl -64(%ebp),%ebx psrld $16,%mm3 psrld $16,%mm7 movl (%ebp),%ecx pxor %mm1,%mm0 pxor %mm5,%mm4 movl 64(%ebp),%edx pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 jmp .L007loop .align 16 .L008out: pxor (%edi),%mm0 pxor 8(%edi),%mm4 ret .size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact .type _x86_AES_decrypt,@function .align 16 _x86_AES_decrypt: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) .align 16 .L009loop: movl %eax,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %dh,%edi xorl 3(%ebp,%edi,8),%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %ebx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %ah,%edi xorl 3(%ebp,%edi,8),%esi movl %edx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %ecx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %bh,%edi xorl 3(%ebp,%edi,8),%esi movl %eax,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %edx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl 20(%esp),%edi andl $255,%edx movl (%ebp,%edx,8),%edx movzbl %ch,%ecx xorl 3(%ebp,%ecx,8),%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx xorl 2(%ebp,%ebx,8),%edx movl 8(%esp),%ebx shrl $24,%eax xorl 1(%ebp,%eax,8),%edx movl 4(%esp),%eax addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L009loop leal 2176(%ebp),%ebp movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi leal -128(%ebp),%ebp movl %eax,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %dh,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %ah,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %bh,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl (%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl (%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl (%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx shrl $24,%eax movzbl (%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl 4(%esp),%eax leal -2048(%ebp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .align 64 .LAES_Td: .long 1353184337,1353184337 .long 1399144830,1399144830 .long 3282310938,3282310938 .long 2522752826,2522752826 .long 3412831035,3412831035 .long 4047871263,4047871263 .long 2874735276,2874735276 .long 2466505547,2466505547 .long 1442459680,1442459680 .long 4134368941,4134368941 .long 2440481928,2440481928 .long 625738485,625738485 .long 4242007375,4242007375 .long 3620416197,3620416197 .long 2151953702,2151953702 .long 2409849525,2409849525 .long 1230680542,1230680542 .long 1729870373,1729870373 .long 2551114309,2551114309 .long 3787521629,3787521629 .long 41234371,41234371 .long 317738113,317738113 .long 2744600205,2744600205 .long 3338261355,3338261355 .long 3881799427,3881799427 .long 2510066197,2510066197 .long 3950669247,3950669247 .long 3663286933,3663286933 .long 763608788,763608788 .long 3542185048,3542185048 .long 694804553,694804553 .long 1154009486,1154009486 .long 1787413109,1787413109 .long 2021232372,2021232372 .long 1799248025,1799248025 .long 3715217703,3715217703 .long 3058688446,3058688446 .long 397248752,397248752 .long 1722556617,1722556617 .long 3023752829,3023752829 .long 407560035,407560035 .long 2184256229,2184256229 .long 1613975959,1613975959 .long 1165972322,1165972322 .long 3765920945,3765920945 .long 2226023355,2226023355 .long 480281086,480281086 .long 2485848313,2485848313 .long 1483229296,1483229296 .long 436028815,436028815 .long 2272059028,2272059028 .long 3086515026,3086515026 .long 601060267,601060267 .long 3791801202,3791801202 .long 1468997603,1468997603 .long 715871590,715871590 .long 120122290,120122290 .long 63092015,63092015 .long 2591802758,2591802758 .long 2768779219,2768779219 .long 4068943920,4068943920 .long 2997206819,2997206819 .long 3127509762,3127509762 .long 1552029421,1552029421 .long 723308426,723308426 .long 2461301159,2461301159 .long 4042393587,4042393587 .long 2715969870,2715969870 .long 3455375973,3455375973 .long 3586000134,3586000134 .long 526529745,526529745 .long 2331944644,2331944644 .long 2639474228,2639474228 .long 2689987490,2689987490 .long 853641733,853641733 .long 1978398372,1978398372 .long 971801355,971801355 .long 2867814464,2867814464 .long 111112542,111112542 .long 1360031421,1360031421 .long 4186579262,4186579262 .long 1023860118,1023860118 .long 2919579357,2919579357 .long 1186850381,1186850381 .long 3045938321,3045938321 .long 90031217,90031217 .long 1876166148,1876166148 .long 4279586912,4279586912 .long 620468249,620468249 .long 2548678102,2548678102 .long 3426959497,3426959497 .long 2006899047,2006899047 .long 3175278768,3175278768 .long 2290845959,2290845959 .long 945494503,945494503 .long 3689859193,3689859193 .long 1191869601,1191869601 .long 3910091388,3910091388 .long 3374220536,3374220536 .long 0,0 .long 2206629897,2206629897 .long 1223502642,1223502642 .long 2893025566,2893025566 .long 1316117100,1316117100 .long 4227796733,4227796733 .long 1446544655,1446544655 .long 517320253,517320253 .long 658058550,658058550 .long 1691946762,1691946762 .long 564550760,564550760 .long 3511966619,3511966619 .long 976107044,976107044 .long 2976320012,2976320012 .long 266819475,266819475 .long 3533106868,3533106868 .long 2660342555,2660342555 .long 1338359936,1338359936 .long 2720062561,2720062561 .long 1766553434,1766553434 .long 370807324,370807324 .long 179999714,179999714 .long 3844776128,3844776128 .long 1138762300,1138762300 .long 488053522,488053522 .long 185403662,185403662 .long 2915535858,2915535858 .long 3114841645,3114841645 .long 3366526484,3366526484 .long 2233069911,2233069911 .long 1275557295,1275557295 .long 3151862254,3151862254 .long 4250959779,4250959779 .long 2670068215,2670068215 .long 3170202204,3170202204 .long 3309004356,3309004356 .long 880737115,880737115 .long 1982415755,1982415755 .long 3703972811,3703972811 .long 1761406390,1761406390 .long 1676797112,1676797112 .long 3403428311,3403428311 .long 277177154,277177154 .long 1076008723,1076008723 .long 538035844,538035844 .long 2099530373,2099530373 .long 4164795346,4164795346 .long 288553390,288553390 .long 1839278535,1839278535 .long 1261411869,1261411869 .long 4080055004,4080055004 .long 3964831245,3964831245 .long 3504587127,3504587127 .long 1813426987,1813426987 .long 2579067049,2579067049 .long 4199060497,4199060497 .long 577038663,577038663 .long 3297574056,3297574056 .long 440397984,440397984 .long 3626794326,3626794326 .long 4019204898,4019204898 .long 3343796615,3343796615 .long 3251714265,3251714265 .long 4272081548,4272081548 .long 906744984,906744984 .long 3481400742,3481400742 .long 685669029,685669029 .long 646887386,646887386 .long 2764025151,2764025151 .long 3835509292,3835509292 .long 227702864,227702864 .long 2613862250,2613862250 .long 1648787028,1648787028 .long 3256061430,3256061430 .long 3904428176,3904428176 .long 1593260334,1593260334 .long 4121936770,4121936770 .long 3196083615,3196083615 .long 2090061929,2090061929 .long 2838353263,2838353263 .long 3004310991,3004310991 .long 999926984,999926984 .long 2809993232,2809993232 .long 1852021992,1852021992 .long 2075868123,2075868123 .long 158869197,158869197 .long 4095236462,4095236462 .long 28809964,28809964 .long 2828685187,2828685187 .long 1701746150,1701746150 .long 2129067946,2129067946 .long 147831841,147831841 .long 3873969647,3873969647 .long 3650873274,3650873274 .long 3459673930,3459673930 .long 3557400554,3557400554 .long 3598495785,3598495785 .long 2947720241,2947720241 .long 824393514,824393514 .long 815048134,815048134 .long 3227951669,3227951669 .long 935087732,935087732 .long 2798289660,2798289660 .long 2966458592,2966458592 .long 366520115,366520115 .long 1251476721,1251476721 .long 4158319681,4158319681 .long 240176511,240176511 .long 804688151,804688151 .long 2379631990,2379631990 .long 1303441219,1303441219 .long 1414376140,1414376140 .long 3741619940,3741619940 .long 3820343710,3820343710 .long 461924940,461924940 .long 3089050817,3089050817 .long 2136040774,2136040774 .long 82468509,82468509 .long 1563790337,1563790337 .long 1937016826,1937016826 .long 776014843,776014843 .long 1511876531,1511876531 .long 1389550482,1389550482 .long 861278441,861278441 .long 323475053,323475053 .long 2355222426,2355222426 .long 2047648055,2047648055 .long 2383738969,2383738969 .long 2302415851,2302415851 .long 3995576782,3995576782 .long 902390199,902390199 .long 3991215329,3991215329 .long 1018251130,1018251130 .long 1507840668,1507840668 .long 1064563285,1064563285 .long 2043548696,2043548696 .long 3208103795,3208103795 .long 3939366739,3939366739 .long 1537932639,1537932639 .long 342834655,342834655 .long 2262516856,2262516856 .long 2180231114,2180231114 .long 1053059257,1053059257 .long 741614648,741614648 .long 1598071746,1598071746 .long 1925389590,1925389590 .long 203809468,203809468 .long 2336832552,2336832552 .long 1100287487,1100287487 .long 1895934009,1895934009 .long 3736275976,3736275976 .long 2632234200,2632234200 .long 2428589668,2428589668 .long 1636092795,1636092795 .long 1890988757,1890988757 .long 1952214088,1952214088 .long 1113045200,1113045200 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .size _x86_AES_decrypt,.-_x86_AES_decrypt .globl AES_decrypt .type AES_decrypt,@function .align 16 AES_decrypt: .L_AES_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%eax subl $36,%esp andl $-64,%esp leal -127(%edi),%ebx subl %esp,%ebx negl %ebx andl $960,%ebx subl %ebx,%esp addl $4,%esp movl %eax,28(%esp) call .L010pic_point .L010pic_point: popl %ebp leal OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax leal .LAES_Td-.L010pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp btl $25,(%eax) jnc .L011x86 movq (%esi),%mm0 movq 8(%esi),%mm4 call _sse_AES_decrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L011x86: movl %ebp,24(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx call _x86_AES_decrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size AES_decrypt,.-.L_AES_decrypt_begin .globl AES_cbc_encrypt .type AES_cbc_encrypt,@function .align 16 AES_cbc_encrypt: .L_AES_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ecx cmpl $0,%ecx je .L012drop_out call .L013pic_point .L013pic_point: popl %ebp leal OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax cmpl $0,40(%esp) leal .LAES_Te-.L013pic_point(%ebp),%ebp jne .L014picked_te leal .LAES_Td-.LAES_Te(%ebp),%ebp .L014picked_te: pushfl cld cmpl $512,%ecx jb .L015slow_way testl $15,%ecx jnz .L015slow_way btl $28,(%eax) jc .L015slow_way leal -324(%esp),%esi andl $-64,%esi movl %ebp,%eax leal 2304(%ebp),%ebx movl %esi,%edx andl $4095,%eax andl $4095,%ebx andl $4095,%edx cmpl %ebx,%edx jb .L016tbl_break_out subl %ebx,%edx subl %edx,%esi jmp .L017tbl_ok .align 4 .L016tbl_break_out: subl %eax,%edx andl $4095,%edx addl $384,%edx subl %edx,%esi .align 4 .L017tbl_ok: leal 24(%esp),%edx xchgl %esi,%esp addl $4,%esp movl %ebp,24(%esp) movl %esi,28(%esp) movl (%edx),%eax movl 4(%edx),%ebx movl 12(%edx),%edi movl 16(%edx),%esi movl 20(%edx),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edi,44(%esp) movl %esi,48(%esp) movl $0,316(%esp) movl %edi,%ebx movl $61,%ecx subl %ebp,%ebx movl %edi,%esi andl $4095,%ebx leal 76(%esp),%edi cmpl $2304,%ebx jb .L018do_copy cmpl $3852,%ebx jb .L019skip_copy .align 4 .L018do_copy: movl %edi,44(%esp) .long 2784229001 .L019skip_copy: movl $16,%edi .align 4 .L020prefetch_tbl: movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%esi leal 128(%ebp),%ebp subl $1,%edi jnz .L020prefetch_tbl subl $2048,%ebp movl 32(%esp),%esi movl 48(%esp),%edi cmpl $0,%edx je .L021fast_decrypt movl (%edi),%eax movl 4(%edi),%ebx .align 16 .L022fast_enc_loop: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_encrypt movl 32(%esp),%esi movl 36(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) leal 16(%esi),%esi movl 40(%esp),%ecx movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L022fast_enc_loop movl 48(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) cmpl $0,316(%esp) movl 44(%esp),%edi je .L023skip_ezero movl $60,%ecx xorl %eax,%eax .align 4 .long 2884892297 .L023skip_ezero: movl 28(%esp),%esp popfl .L012drop_out: popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L021fast_decrypt: cmpl 36(%esp),%esi je .L024fast_dec_in_place movl %edi,52(%esp) .align 4 .align 16 .L025fast_dec_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_decrypt movl 52(%esp),%edi movl 40(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 36(%esp),%edi movl 32(%esp),%esi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx movl %esi,52(%esp) leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edi movl %edi,36(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L025fast_dec_loop movl 52(%esp),%edi movl 48(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) jmp .L026fast_dec_out .align 16 .L024fast_dec_in_place: .L027fast_dec_in_place_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx leal 60(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 44(%esp),%edi call _x86_AES_decrypt movl 48(%esp),%edi movl 36(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,36(%esp) leal 60(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%esi movl 40(%esp),%ecx leal 16(%esi),%esi movl %esi,32(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L027fast_dec_in_place_loop .align 4 .L026fast_dec_out: cmpl $0,316(%esp) movl 44(%esp),%edi je .L028skip_dzero movl $60,%ecx xorl %eax,%eax .align 4 .long 2884892297 .L028skip_dzero: movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L015slow_way: movl (%eax),%eax movl 36(%esp),%edi leal -80(%esp),%esi andl $-64,%esi leal -143(%edi),%ebx subl %esi,%ebx negl %ebx andl $960,%ebx subl %ebx,%esi leal 768(%esi),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp leal 24(%esp),%edx xchgl %esi,%esp addl $4,%esp movl %ebp,24(%esp) movl %esi,28(%esp) movl %eax,52(%esp) movl (%edx),%eax movl 4(%edx),%ebx movl 16(%edx),%esi movl 20(%edx),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edi,44(%esp) movl %esi,48(%esp) movl %esi,%edi movl %eax,%esi cmpl $0,%edx je .L029slow_decrypt cmpl $16,%ecx movl %ebx,%edx jb .L030slow_enc_tail btl $25,52(%esp) jnc .L031slow_enc_x86 movq (%edi),%mm0 movq 8(%edi),%mm4 .align 16 .L032slow_enc_loop_sse: pxor (%esi),%mm0 pxor 8(%esi),%mm4 movl 44(%esp),%edi call _sse_AES_encrypt_compact movl 32(%esp),%esi movl 36(%esp),%edi movl 40(%esp),%ecx movq %mm0,(%edi) movq %mm4,8(%edi) leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx cmpl $16,%ecx movl %ecx,40(%esp) jae .L032slow_enc_loop_sse testl $15,%ecx jnz .L030slow_enc_tail movl 48(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L031slow_enc_x86: movl (%edi),%eax movl 4(%edi),%ebx .align 4 .L033slow_enc_loop_x86: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_encrypt_compact movl 32(%esp),%esi movl 36(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx cmpl $16,%ecx movl %ecx,40(%esp) jae .L033slow_enc_loop_x86 testl $15,%ecx jnz .L030slow_enc_tail movl 48(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L030slow_enc_tail: emms movl %edx,%edi movl $16,%ebx subl %ecx,%ebx cmpl %esi,%edi je .L034enc_in_place .align 4 .long 2767451785 jmp .L035enc_skip_in_place .L034enc_in_place: leal (%edi,%ecx,1),%edi .L035enc_skip_in_place: movl %ebx,%ecx xorl %eax,%eax .align 4 .long 2868115081 movl 48(%esp),%edi movl %edx,%esi movl (%edi),%eax movl 4(%edi),%ebx movl $16,40(%esp) jmp .L033slow_enc_loop_x86 .align 16 .L029slow_decrypt: btl $25,52(%esp) jnc .L036slow_dec_loop_x86 .align 4 .L037slow_dec_loop_sse: movq (%esi),%mm0 movq 8(%esi),%mm4 movl 44(%esp),%edi call _sse_AES_decrypt_compact movl 32(%esp),%esi leal 60(%esp),%eax movl 36(%esp),%ebx movl 40(%esp),%ecx movl 48(%esp),%edi movq (%esi),%mm1 movq 8(%esi),%mm5 pxor (%edi),%mm0 pxor 8(%edi),%mm4 movq %mm1,(%edi) movq %mm5,8(%edi) subl $16,%ecx jc .L038slow_dec_partial_sse movq %mm0,(%ebx) movq %mm4,8(%ebx) leal 16(%ebx),%ebx movl %ebx,36(%esp) leal 16(%esi),%esi movl %esi,32(%esp) movl %ecx,40(%esp) jnz .L037slow_dec_loop_sse emms movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L038slow_dec_partial_sse: movq %mm0,(%eax) movq %mm4,8(%eax) emms addl $16,%ecx movl %ebx,%edi movl %eax,%esi .align 4 .long 2767451785 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L036slow_dec_loop_x86: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx leal 60(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 44(%esp),%edi call _x86_AES_decrypt_compact movl 48(%esp),%edi movl 40(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx subl $16,%esi jc .L039slow_dec_partial_x86 movl %esi,40(%esp) movl 36(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,36(%esp) leal 60(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%esi leal 16(%esi),%esi movl %esi,32(%esp) jnz .L036slow_dec_loop_x86 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L039slow_dec_partial_x86: leal 60(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 32(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx movl 36(%esp),%edi leal 60(%esp),%esi .align 4 .long 2767451785 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret .size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin .type _x86_AES_set_encrypt_key,@function .align 16 _x86_AES_set_encrypt_key: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 24(%esp),%esi movl 32(%esp),%edi testl $-1,%esi jz .L040badpointer testl $-1,%edi jz .L040badpointer call .L041pic_point .L041pic_point: popl %ebp leal .LAES_Te-.L041pic_point(%ebp),%ebp leal 2176(%ebp),%ebp movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx movl 28(%esp),%ecx cmpl $128,%ecx je .L04210rounds cmpl $192,%ecx je .L04312rounds cmpl $256,%ecx je .L04414rounds movl $-2,%eax jmp .L045exit .L04210rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) xorl %ecx,%ecx jmp .L04610shortcut .align 4 .L04710loop: movl (%edi),%eax movl 12(%edi),%edx .L04610shortcut: movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,16(%edi) xorl 4(%edi),%eax movl %eax,20(%edi) xorl 8(%edi),%eax movl %eax,24(%edi) xorl 12(%edi),%eax movl %eax,28(%edi) incl %ecx addl $16,%edi cmpl $10,%ecx jl .L04710loop movl $10,80(%edi) xorl %eax,%eax jmp .L045exit .L04312rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 16(%esi),%ecx movl 20(%esi),%edx movl %ecx,16(%edi) movl %edx,20(%edi) xorl %ecx,%ecx jmp .L04812shortcut .align 4 .L04912loop: movl (%edi),%eax movl 20(%edi),%edx .L04812shortcut: movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,24(%edi) xorl 4(%edi),%eax movl %eax,28(%edi) xorl 8(%edi),%eax movl %eax,32(%edi) xorl 12(%edi),%eax movl %eax,36(%edi) cmpl $7,%ecx je .L05012break incl %ecx xorl 16(%edi),%eax movl %eax,40(%edi) xorl 20(%edi),%eax movl %eax,44(%edi) addl $24,%edi jmp .L04912loop .L05012break: movl $12,72(%edi) xorl %eax,%eax jmp .L045exit .L04414rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx movl %eax,16(%edi) movl %ebx,20(%edi) movl %ecx,24(%edi) movl %edx,28(%edi) xorl %ecx,%ecx jmp .L05114shortcut .align 4 .L05214loop: movl 28(%edi),%edx .L05114shortcut: movl (%edi),%eax movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,32(%edi) xorl 4(%edi),%eax movl %eax,36(%edi) xorl 8(%edi),%eax movl %eax,40(%edi) xorl 12(%edi),%eax movl %eax,44(%edi) cmpl $6,%ecx je .L05314break incl %ecx movl %eax,%edx movl 16(%edi),%eax movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx shll $8,%ebx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $16,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $24,%ebx xorl %ebx,%eax movl %eax,48(%edi) xorl 20(%edi),%eax movl %eax,52(%edi) xorl 24(%edi),%eax movl %eax,56(%edi) xorl 28(%edi),%eax movl %eax,60(%edi) addl $32,%edi jmp .L05214loop .L05314break: movl $14,48(%edi) xorl %eax,%eax jmp .L045exit .L040badpointer: movl $-1,%eax .L045exit: popl %edi popl %esi popl %ebx popl %ebp ret .size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key .globl private_AES_set_encrypt_key .type private_AES_set_encrypt_key,@function .align 16 private_AES_set_encrypt_key: .L_private_AES_set_encrypt_key_begin: call _x86_AES_set_encrypt_key ret .size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin .globl private_AES_set_decrypt_key .type private_AES_set_decrypt_key,@function .align 16 private_AES_set_decrypt_key: .L_private_AES_set_decrypt_key_begin: call _x86_AES_set_encrypt_key cmpl $0,%eax je .L054proceed ret .L054proceed: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%esi movl 240(%esi),%ecx leal (,%ecx,4),%ecx leal (%esi,%ecx,4),%edi .align 4 .L055invert: movl (%esi),%eax movl 4(%esi),%ebx movl (%edi),%ecx movl 4(%edi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,(%esi) movl %edx,4(%esi) movl 8(%esi),%eax movl 12(%esi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,8(%edi) movl %ebx,12(%edi) movl %ecx,8(%esi) movl %edx,12(%esi) addl $16,%esi subl $16,%edi cmpl %edi,%esi jne .L055invert movl 28(%esp),%edi movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,28(%esp) movl 16(%edi),%eax .align 4 .L056permute: addl $16,%edi movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %esi,%ebx movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %eax,%ebx xorl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp xorl %eax,%ecx subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi roll $8,%eax xorl %esi,%edx movl 4(%edi),%ebp xorl %ebx,%eax xorl %edx,%ebx xorl %ecx,%eax roll $24,%ebx xorl %edx,%ecx xorl %edx,%eax roll $16,%ecx xorl %ebx,%eax roll $8,%edx xorl %ecx,%eax movl %ebp,%ebx xorl %edx,%eax movl %eax,(%edi) movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx xorl %esi,%edx movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp xorl %ebx,%edx subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi roll $8,%ebx xorl %esi,%eax movl 8(%edi),%ebp xorl %ecx,%ebx xorl %eax,%ecx xorl %edx,%ebx roll $24,%ecx xorl %eax,%edx xorl %eax,%ebx roll $16,%edx xorl %ecx,%ebx roll $8,%eax xorl %edx,%ebx movl %ebp,%ecx xorl %eax,%ebx movl %ebx,4(%edi) movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %esi,%edx movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %ecx,%edx xorl %esi,%eax movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp xorl %ecx,%eax subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi roll $8,%ecx xorl %esi,%ebx movl 12(%edi),%ebp xorl %edx,%ecx xorl %ebx,%edx xorl %eax,%ecx roll $24,%edx xorl %ebx,%eax xorl %ebx,%ecx roll $16,%eax xorl %edx,%ecx roll $8,%ebx xorl %eax,%ecx movl %ebp,%edx xorl %ebx,%ecx movl %ecx,8(%edi) movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %esi,%eax movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %edx,%eax xorl %esi,%ebx movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp xorl %edx,%ebx subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi roll $8,%edx xorl %esi,%ecx movl 16(%edi),%ebp xorl %eax,%edx xorl %ecx,%eax xorl %ebx,%edx roll $24,%eax xorl %ecx,%ebx xorl %ecx,%edx roll $16,%ebx xorl %eax,%edx roll $8,%ecx xorl %ebx,%edx movl %ebp,%eax xorl %ecx,%edx movl %edx,12(%edi) cmpl 28(%esp),%edi jb .L056permute xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "aes-586.S" .text .type _x86_AES_encrypt_compact,@function .align 16 _x86_AES_encrypt_compact: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi .align 16 .L000loop: movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movzbl -128(%ebp,%esi,1),%esi movzbl %ch,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax shll $8,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movzbl -128(%ebp,%ecx,1),%ecx shll $24,%ecx xorl %ecx,%edx movl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %ecx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %ecx,%edi xorl %esi,%ecx rorl $24,%edi xorl %ebp,%esi roll $24,%ecx xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%ecx andl %edx,%ebp leal (%edx,%edx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %edx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %edx,%edi xorl %esi,%edx rorl $24,%edi xorl %ebp,%esi roll $24,%edx xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%edx andl %eax,%ebp leal (%eax,%eax,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %eax,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %eax,%edi xorl %esi,%eax rorl $24,%edi xorl %ebp,%esi roll $24,%eax xorl %edi,%esi movl $2155905152,%ebp xorl %esi,%eax andl %ebx,%ebp leal (%ebx,%ebx,1),%edi movl %ebp,%esi shrl $7,%ebp andl $4278124286,%edi subl %ebp,%esi movl %ebx,%ebp andl $454761243,%esi rorl $16,%ebp xorl %edi,%esi movl %ebx,%edi xorl %esi,%ebx rorl $24,%edi xorl %ebp,%esi roll $24,%ebx xorl %edi,%esi xorl %esi,%ebx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L000loop movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movzbl -128(%ebp,%esi,1),%esi movzbl %ch,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax shll $8,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movzbl -128(%ebp,%ecx,1),%ecx shll $24,%ecx xorl %ecx,%edx movl %esi,%ecx xorl 16(%edi),%eax xorl 20(%edi),%ebx xorl 24(%edi),%ecx xorl 28(%edi),%edx ret .size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact .type _sse_AES_encrypt_compact,@function .align 16 _sse_AES_encrypt_compact: pxor (%edi),%mm0 pxor 8(%edi),%mm4 movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl $454761243,%eax movl %eax,8(%esp) movl %eax,12(%esp) movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx .align 16 .L001loop: pshufw $8,%mm0,%mm1 pshufw $13,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx movl %edi,20(%esp) movzbl %al,%esi movzbl %ah,%edx pshufw $13,%mm0,%mm2 movzbl -128(%ebp,%esi,1),%ecx movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx shrl $16,%eax shll $8,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $16,%esi pshufw $8,%mm4,%mm6 orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $24,%esi shrl $16,%ebx orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $8,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi movd %mm2,%eax movd %ecx,%mm0 movzbl -128(%ebp,%edi,1),%ecx movzbl %ah,%edi shll $16,%ecx movd %mm6,%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi shll $8,%esi shrl $16,%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shrl $16,%eax movd %ecx,%mm1 movzbl -128(%ebp,%edi,1),%ecx movzbl %ah,%edi shll $16,%ecx andl $255,%eax orl %esi,%ecx punpckldq %mm1,%mm0 movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi andl $255,%ebx movzbl -128(%ebp,%eax,1),%eax orl %esi,%ecx shll $16,%eax movzbl -128(%ebp,%edi,1),%esi orl %eax,%edx shll $8,%esi movzbl -128(%ebp,%ebx,1),%ebx orl %esi,%ecx orl %ebx,%edx movl 20(%esp),%edi movd %ecx,%mm4 movd %edx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi cmpl 24(%esp),%edi ja .L002out movq 8(%esp),%mm2 pxor %mm3,%mm3 pxor %mm7,%mm7 movq %mm0,%mm1 movq %mm4,%mm5 pcmpgtb %mm0,%mm3 pcmpgtb %mm4,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 pshufw $177,%mm0,%mm2 pshufw $177,%mm4,%mm6 paddb %mm0,%mm0 paddb %mm4,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pshufw $177,%mm2,%mm3 pshufw $177,%mm6,%mm7 pxor %mm0,%mm1 pxor %mm4,%mm5 pxor %mm2,%mm0 pxor %mm6,%mm4 movq %mm3,%mm2 movq %mm7,%mm6 pslld $8,%mm3 pslld $8,%mm7 psrld $24,%mm2 psrld $24,%mm6 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 movq %mm1,%mm3 movq %mm5,%mm7 movq (%edi),%mm2 movq 8(%edi),%mm6 psrld $8,%mm1 psrld $8,%mm5 movl -128(%ebp),%eax pslld $24,%mm3 pslld $24,%mm7 movl -64(%ebp),%ebx pxor %mm1,%mm0 pxor %mm5,%mm4 movl (%ebp),%ecx pxor %mm3,%mm0 pxor %mm7,%mm4 movl 64(%ebp),%edx pxor %mm2,%mm0 pxor %mm6,%mm4 jmp .L001loop .align 16 .L002out: pxor (%edi),%mm0 pxor 8(%edi),%mm4 ret .size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact .type _x86_AES_encrypt,@function .align 16 _x86_AES_encrypt: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) .align 16 .L003loop: movl %eax,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %bh,%edi xorl 3(%ebp,%edi,8),%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %edx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movl (%ebp,%esi,8),%esi movzbl %ch,%edi xorl 3(%ebp,%edi,8),%esi movl %edx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %eax,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movl (%ebp,%esi,8),%esi movzbl %dh,%edi xorl 3(%ebp,%edi,8),%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi xorl 2(%ebp,%edi,8),%esi movzbl %bh,%edi xorl 1(%ebp,%edi,8),%esi movl 20(%esp),%edi movl (%ebp,%edx,8),%edx movzbl %ah,%eax xorl 3(%ebp,%eax,8),%edx movl 4(%esp),%eax andl $255,%ebx xorl 2(%ebp,%ebx,8),%edx movl 8(%esp),%ebx xorl 1(%ebp,%ecx,8),%edx movl %esi,%ecx addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L003loop movl %eax,%esi andl $255,%esi movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %bh,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi shrl $16,%ebx movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %ch,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movl %eax,%edi shrl $24,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi shrl $24,%ecx movl 2(%ebp,%esi,8),%esi andl $255,%esi movzbl %dh,%edi movl (%ebp,%edi,8),%edi andl $65280,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edx andl $255,%edi movl (%ebp,%edi,8),%edi andl $16711680,%edi xorl %edi,%esi movzbl %bh,%edi movl 2(%ebp,%edi,8),%edi andl $4278190080,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movl 2(%ebp,%edx,8),%edx andl $255,%edx movzbl %ah,%eax movl (%ebp,%eax,8),%eax andl $65280,%eax xorl %eax,%edx movl 4(%esp),%eax andl $255,%ebx movl (%ebp,%ebx,8),%ebx andl $16711680,%ebx xorl %ebx,%edx movl 8(%esp),%ebx movl 2(%ebp,%ecx,8),%ecx andl $4278190080,%ecx xorl %ecx,%edx movl %esi,%ecx addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .align 64 .LAES_Te: .long 2774754246,2774754246 .long 2222750968,2222750968 .long 2574743534,2574743534 .long 2373680118,2373680118 .long 234025727,234025727 .long 3177933782,3177933782 .long 2976870366,2976870366 .long 1422247313,1422247313 .long 1345335392,1345335392 .long 50397442,50397442 .long 2842126286,2842126286 .long 2099981142,2099981142 .long 436141799,436141799 .long 1658312629,1658312629 .long 3870010189,3870010189 .long 2591454956,2591454956 .long 1170918031,1170918031 .long 2642575903,2642575903 .long 1086966153,1086966153 .long 2273148410,2273148410 .long 368769775,368769775 .long 3948501426,3948501426 .long 3376891790,3376891790 .long 200339707,200339707 .long 3970805057,3970805057 .long 1742001331,1742001331 .long 4255294047,4255294047 .long 3937382213,3937382213 .long 3214711843,3214711843 .long 4154762323,4154762323 .long 2524082916,2524082916 .long 1539358875,1539358875 .long 3266819957,3266819957 .long 486407649,486407649 .long 2928907069,2928907069 .long 1780885068,1780885068 .long 1513502316,1513502316 .long 1094664062,1094664062 .long 49805301,49805301 .long 1338821763,1338821763 .long 1546925160,1546925160 .long 4104496465,4104496465 .long 887481809,887481809 .long 150073849,150073849 .long 2473685474,2473685474 .long 1943591083,1943591083 .long 1395732834,1395732834 .long 1058346282,1058346282 .long 201589768,201589768 .long 1388824469,1388824469 .long 1696801606,1696801606 .long 1589887901,1589887901 .long 672667696,672667696 .long 2711000631,2711000631 .long 251987210,251987210 .long 3046808111,3046808111 .long 151455502,151455502 .long 907153956,907153956 .long 2608889883,2608889883 .long 1038279391,1038279391 .long 652995533,652995533 .long 1764173646,1764173646 .long 3451040383,3451040383 .long 2675275242,2675275242 .long 453576978,453576978 .long 2659418909,2659418909 .long 1949051992,1949051992 .long 773462580,773462580 .long 756751158,756751158 .long 2993581788,2993581788 .long 3998898868,3998898868 .long 4221608027,4221608027 .long 4132590244,4132590244 .long 1295727478,1295727478 .long 1641469623,1641469623 .long 3467883389,3467883389 .long 2066295122,2066295122 .long 1055122397,1055122397 .long 1898917726,1898917726 .long 2542044179,2542044179 .long 4115878822,4115878822 .long 1758581177,1758581177 .long 0,0 .long 753790401,753790401 .long 1612718144,1612718144 .long 536673507,536673507 .long 3367088505,3367088505 .long 3982187446,3982187446 .long 3194645204,3194645204 .long 1187761037,1187761037 .long 3653156455,3653156455 .long 1262041458,1262041458 .long 3729410708,3729410708 .long 3561770136,3561770136 .long 3898103984,3898103984 .long 1255133061,1255133061 .long 1808847035,1808847035 .long 720367557,720367557 .long 3853167183,3853167183 .long 385612781,385612781 .long 3309519750,3309519750 .long 3612167578,3612167578 .long 1429418854,1429418854 .long 2491778321,2491778321 .long 3477423498,3477423498 .long 284817897,284817897 .long 100794884,100794884 .long 2172616702,2172616702 .long 4031795360,4031795360 .long 1144798328,1144798328 .long 3131023141,3131023141 .long 3819481163,3819481163 .long 4082192802,4082192802 .long 4272137053,4272137053 .long 3225436288,3225436288 .long 2324664069,2324664069 .long 2912064063,2912064063 .long 3164445985,3164445985 .long 1211644016,1211644016 .long 83228145,83228145 .long 3753688163,3753688163 .long 3249976951,3249976951 .long 1977277103,1977277103 .long 1663115586,1663115586 .long 806359072,806359072 .long 452984805,452984805 .long 250868733,250868733 .long 1842533055,1842533055 .long 1288555905,1288555905 .long 336333848,336333848 .long 890442534,890442534 .long 804056259,804056259 .long 3781124030,3781124030 .long 2727843637,2727843637 .long 3427026056,3427026056 .long 957814574,957814574 .long 1472513171,1472513171 .long 4071073621,4071073621 .long 2189328124,2189328124 .long 1195195770,1195195770 .long 2892260552,2892260552 .long 3881655738,3881655738 .long 723065138,723065138 .long 2507371494,2507371494 .long 2690670784,2690670784 .long 2558624025,2558624025 .long 3511635870,3511635870 .long 2145180835,2145180835 .long 1713513028,1713513028 .long 2116692564,2116692564 .long 2878378043,2878378043 .long 2206763019,2206763019 .long 3393603212,3393603212 .long 703524551,703524551 .long 3552098411,3552098411 .long 1007948840,1007948840 .long 2044649127,2044649127 .long 3797835452,3797835452 .long 487262998,487262998 .long 1994120109,1994120109 .long 1004593371,1004593371 .long 1446130276,1446130276 .long 1312438900,1312438900 .long 503974420,503974420 .long 3679013266,3679013266 .long 168166924,168166924 .long 1814307912,1814307912 .long 3831258296,3831258296 .long 1573044895,1573044895 .long 1859376061,1859376061 .long 4021070915,4021070915 .long 2791465668,2791465668 .long 2828112185,2828112185 .long 2761266481,2761266481 .long 937747667,937747667 .long 2339994098,2339994098 .long 854058965,854058965 .long 1137232011,1137232011 .long 1496790894,1496790894 .long 3077402074,3077402074 .long 2358086913,2358086913 .long 1691735473,1691735473 .long 3528347292,3528347292 .long 3769215305,3769215305 .long 3027004632,3027004632 .long 4199962284,4199962284 .long 133494003,133494003 .long 636152527,636152527 .long 2942657994,2942657994 .long 2390391540,2390391540 .long 3920539207,3920539207 .long 403179536,403179536 .long 3585784431,3585784431 .long 2289596656,2289596656 .long 1864705354,1864705354 .long 1915629148,1915629148 .long 605822008,605822008 .long 4054230615,4054230615 .long 3350508659,3350508659 .long 1371981463,1371981463 .long 602466507,602466507 .long 2094914977,2094914977 .long 2624877800,2624877800 .long 555687742,555687742 .long 3712699286,3712699286 .long 3703422305,3703422305 .long 2257292045,2257292045 .long 2240449039,2240449039 .long 2423288032,2423288032 .long 1111375484,1111375484 .long 3300242801,3300242801 .long 2858837708,2858837708 .long 3628615824,3628615824 .long 84083462,84083462 .long 32962295,32962295 .long 302911004,302911004 .long 2741068226,2741068226 .long 1597322602,1597322602 .long 4183250862,4183250862 .long 3501832553,3501832553 .long 2441512471,2441512471 .long 1489093017,1489093017 .long 656219450,656219450 .long 3114180135,3114180135 .long 954327513,954327513 .long 335083755,335083755 .long 3013122091,3013122091 .long 856756514,856756514 .long 3144247762,3144247762 .long 1893325225,1893325225 .long 2307821063,2307821063 .long 2811532339,2811532339 .long 3063651117,3063651117 .long 572399164,572399164 .long 2458355477,2458355477 .long 552200649,552200649 .long 1238290055,1238290055 .long 4283782570,4283782570 .long 2015897680,2015897680 .long 2061492133,2061492133 .long 2408352771,2408352771 .long 4171342169,4171342169 .long 2156497161,2156497161 .long 386731290,386731290 .long 3669999461,3669999461 .long 837215959,837215959 .long 3326231172,3326231172 .long 3093850320,3093850320 .long 3275833730,3275833730 .long 2962856233,2962856233 .long 1999449434,1999449434 .long 286199582,286199582 .long 3417354363,3417354363 .long 4233385128,4233385128 .long 3602627437,3602627437 .long 974525996,974525996 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .byte 99,124,119,123,242,107,111,197 .byte 48,1,103,43,254,215,171,118 .byte 202,130,201,125,250,89,71,240 .byte 173,212,162,175,156,164,114,192 .byte 183,253,147,38,54,63,247,204 .byte 52,165,229,241,113,216,49,21 .byte 4,199,35,195,24,150,5,154 .byte 7,18,128,226,235,39,178,117 .byte 9,131,44,26,27,110,90,160 .byte 82,59,214,179,41,227,47,132 .byte 83,209,0,237,32,252,177,91 .byte 106,203,190,57,74,76,88,207 .byte 208,239,170,251,67,77,51,133 .byte 69,249,2,127,80,60,159,168 .byte 81,163,64,143,146,157,56,245 .byte 188,182,218,33,16,255,243,210 .byte 205,12,19,236,95,151,68,23 .byte 196,167,126,61,100,93,25,115 .byte 96,129,79,220,34,42,144,136 .byte 70,238,184,20,222,94,11,219 .byte 224,50,58,10,73,6,36,92 .byte 194,211,172,98,145,149,228,121 .byte 231,200,55,109,141,213,78,169 .byte 108,86,244,234,101,122,174,8 .byte 186,120,37,46,28,166,180,198 .byte 232,221,116,31,75,189,139,138 .byte 112,62,181,102,72,3,246,14 .byte 97,53,87,185,134,193,29,158 .byte 225,248,152,17,105,217,142,148 .byte 155,30,135,233,206,85,40,223 .byte 140,161,137,13,191,230,66,104 .byte 65,153,45,15,176,84,187,22 .long 1,2,4,8 .long 16,32,64,128 .long 27,54,0,0 .long 0,0,0,0 .size _x86_AES_encrypt,.-_x86_AES_encrypt .globl AES_encrypt .type AES_encrypt,@function .align 16 AES_encrypt: .L_AES_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%eax subl $36,%esp andl $-64,%esp leal -127(%edi),%ebx subl %esp,%ebx negl %ebx andl $960,%ebx subl %ebx,%esp addl $4,%esp movl %eax,28(%esp) call .L004pic_point .L004pic_point: popl %ebp leal OPENSSL_ia32cap_P,%eax leal .LAES_Te-.L004pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp btl $25,(%eax) jnc .L005x86 movq (%esi),%mm0 movq 8(%esi),%mm4 call _sse_AES_encrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L005x86: movl %ebp,24(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx call _x86_AES_encrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size AES_encrypt,.-.L_AES_encrypt_begin .type _x86_AES_decrypt_compact,@function .align 16 _x86_AES_decrypt_compact: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi .align 16 .L006loop: movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %ah,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl -128(%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx shrl $24,%eax movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%eax subl %edi,%esi andl $4278124286,%eax andl $454761243,%esi xorl %esi,%eax movl $2155905152,%edi andl %eax,%edi movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %ecx,%eax xorl %esi,%ebx movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %ecx,%ebx roll $8,%ecx xorl %esi,%ebp xorl %eax,%ecx xorl %ebp,%eax xorl %ebx,%ecx xorl %ebp,%ebx roll $24,%eax xorl %ebp,%ecx roll $16,%ebx xorl %eax,%ecx roll $8,%ebp xorl %ebx,%ecx movl 4(%esp),%eax xorl %ebp,%ecx movl %ecx,12(%esp) movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %esi,%ebx movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %edx,%ebx xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %edx,%ecx roll $8,%edx xorl %esi,%ebp xorl %ebx,%edx xorl %ebp,%ebx xorl %ecx,%edx xorl %ebp,%ecx roll $24,%ebx xorl %ebp,%edx roll $16,%ecx xorl %ebx,%edx roll $8,%ebp xorl %ecx,%edx movl 8(%esp),%ebx xorl %ebp,%edx movl %edx,16(%esp) movl $2155905152,%edi andl %eax,%edi movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %eax,%ecx xorl %esi,%edx movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %eax,%edx roll $8,%eax xorl %esi,%ebp xorl %ecx,%eax xorl %ebp,%ecx xorl %edx,%eax xorl %ebp,%edx roll $24,%ecx xorl %ebp,%eax roll $16,%edx xorl %ecx,%eax roll $8,%ebp xorl %edx,%eax xorl %ebp,%eax movl $2155905152,%edi andl %ebx,%edi movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%edi andl %ecx,%edi movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx xorl %esi,%edx movl $2155905152,%edi andl %edx,%edi movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi andl $4278124286,%ebp andl $454761243,%esi xorl %ebx,%edx roll $8,%ebx xorl %esi,%ebp xorl %ecx,%ebx xorl %ebp,%ecx xorl %edx,%ebx xorl %ebp,%edx roll $24,%ecx xorl %ebp,%ebx roll $16,%edx xorl %ecx,%ebx roll $8,%ebp xorl %edx,%ebx movl 12(%esp),%ecx xorl %ebp,%ebx movl 16(%esp),%edx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L006loop movl %eax,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %dh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %ah,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl -128(%ebp,%esi,1),%esi movzbl %bh,%edi movzbl -128(%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl -128(%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl -128(%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl -128(%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl -128(%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx shrl $24,%eax movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl 4(%esp),%eax xorl 16(%edi),%eax xorl 20(%edi),%ebx xorl 24(%edi),%ecx xorl 28(%edi),%edx ret .size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact .type _sse_AES_decrypt_compact,@function .align 16 _sse_AES_decrypt_compact: pxor (%edi),%mm0 pxor 8(%edi),%mm4 movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) movl $454761243,%eax movl %eax,8(%esp) movl %eax,12(%esp) movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx .align 16 .L007loop: pshufw $12,%mm0,%mm1 pshufw $9,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx movl %edi,20(%esp) movzbl %al,%esi movzbl %ah,%edx pshufw $6,%mm0,%mm2 movzbl -128(%ebp,%esi,1),%ecx movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx shrl $16,%eax shll $8,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $16,%esi pshufw $3,%mm4,%mm6 orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $24,%esi shrl $16,%ebx orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shll $24,%esi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $8,%esi movd %mm2,%eax orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi shll $16,%esi movd %mm6,%ebx movd %ecx,%mm0 movzbl -128(%ebp,%edi,1),%ecx movzbl %al,%edi orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi movzbl %bl,%edi orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %ah,%edi shll $16,%esi shrl $16,%eax orl %esi,%edx movzbl -128(%ebp,%edi,1),%esi movzbl %bh,%edi shrl $16,%ebx shll $8,%esi movd %edx,%mm1 movzbl -128(%ebp,%edi,1),%edx movzbl %bh,%edi shll $24,%edx andl $255,%ebx orl %esi,%edx punpckldq %mm1,%mm0 movzbl -128(%ebp,%edi,1),%esi movzbl %al,%edi shll $8,%esi movzbl %ah,%eax movzbl -128(%ebp,%ebx,1),%ebx orl %esi,%ecx movzbl -128(%ebp,%edi,1),%esi orl %ebx,%edx shll $16,%esi movzbl -128(%ebp,%eax,1),%eax orl %esi,%edx shll $24,%eax orl %eax,%ecx movl 20(%esp),%edi movd %edx,%mm4 movd %ecx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi cmpl 24(%esp),%edi ja .L008out movq %mm0,%mm3 movq %mm4,%mm7 pshufw $228,%mm0,%mm2 pshufw $228,%mm4,%mm6 movq %mm0,%mm1 movq %mm4,%mm5 pshufw $177,%mm0,%mm0 pshufw $177,%mm4,%mm4 pslld $8,%mm2 pslld $8,%mm6 psrld $8,%mm3 psrld $8,%mm7 pxor %mm2,%mm0 pxor %mm6,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pslld $16,%mm2 pslld $16,%mm6 psrld $16,%mm3 psrld $16,%mm7 pxor %mm2,%mm0 pxor %mm6,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 movq 8(%esp),%mm3 pxor %mm2,%mm2 pxor %mm6,%mm6 pcmpgtb %mm1,%mm2 pcmpgtb %mm5,%mm6 pand %mm3,%mm2 pand %mm3,%mm6 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm2,%mm1 pxor %mm6,%mm5 movq %mm1,%mm3 movq %mm5,%mm7 movq %mm1,%mm2 movq %mm5,%mm6 pxor %mm1,%mm0 pxor %mm5,%mm4 pslld $24,%mm3 pslld $24,%mm7 psrld $8,%mm2 psrld $8,%mm6 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 movq 8(%esp),%mm2 pxor %mm3,%mm3 pxor %mm7,%mm7 pcmpgtb %mm1,%mm3 pcmpgtb %mm5,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm3,%mm1 pxor %mm7,%mm5 pshufw $177,%mm1,%mm3 pshufw $177,%mm5,%mm7 pxor %mm1,%mm0 pxor %mm5,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm3,%mm3 pxor %mm7,%mm7 pcmpgtb %mm1,%mm3 pcmpgtb %mm5,%mm7 pand %mm2,%mm3 pand %mm2,%mm7 paddb %mm1,%mm1 paddb %mm5,%mm5 pxor %mm3,%mm1 pxor %mm7,%mm5 pxor %mm1,%mm0 pxor %mm5,%mm4 movq %mm1,%mm3 movq %mm5,%mm7 pshufw $177,%mm1,%mm2 pshufw $177,%mm5,%mm6 pxor %mm2,%mm0 pxor %mm6,%mm4 pslld $8,%mm1 pslld $8,%mm5 psrld $8,%mm3 psrld $8,%mm7 movq (%edi),%mm2 movq 8(%edi),%mm6 pxor %mm1,%mm0 pxor %mm5,%mm4 pxor %mm3,%mm0 pxor %mm7,%mm4 movl -128(%ebp),%eax pslld $16,%mm1 pslld $16,%mm5 movl -64(%ebp),%ebx psrld $16,%mm3 psrld $16,%mm7 movl (%ebp),%ecx pxor %mm1,%mm0 pxor %mm5,%mm4 movl 64(%ebp),%edx pxor %mm3,%mm0 pxor %mm7,%mm4 pxor %mm2,%mm0 pxor %mm6,%mm4 jmp .L007loop .align 16 .L008out: pxor (%edi),%mm0 pxor 8(%edi),%mm4 ret .size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact .type _x86_AES_decrypt,@function .align 16 _x86_AES_decrypt: movl %edi,20(%esp) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,24(%esp) .align 16 .L009loop: movl %eax,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %dh,%edi xorl 3(%ebp,%edi,8),%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %ebx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %ah,%edi xorl 3(%ebp,%edi,8),%esi movl %edx,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %ecx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movl (%ebp,%esi,8),%esi movzbl %bh,%edi xorl 3(%ebp,%edi,8),%esi movl %eax,%edi shrl $16,%edi andl $255,%edi xorl 2(%ebp,%edi,8),%esi movl %edx,%edi shrl $24,%edi xorl 1(%ebp,%edi,8),%esi movl 20(%esp),%edi andl $255,%edx movl (%ebp,%edx,8),%edx movzbl %ch,%ecx xorl 3(%ebp,%ecx,8),%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx xorl 2(%ebp,%ebx,8),%edx movl 8(%esp),%ebx shrl $24,%eax xorl 1(%ebp,%eax,8),%edx movl 4(%esp),%eax addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx cmpl 24(%esp),%edi movl %edi,20(%esp) jb .L009loop leal 2176(%ebp),%ebp movl -128(%ebp),%edi movl -96(%ebp),%esi movl -64(%ebp),%edi movl -32(%ebp),%esi movl (%ebp),%edi movl 32(%ebp),%esi movl 64(%ebp),%edi movl 96(%ebp),%esi leal -128(%ebp),%ebp movl %eax,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %dh,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %ecx,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ebx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,4(%esp) movl %ebx,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %ah,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %edx,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %ecx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl %esi,8(%esp) movl %ecx,%esi andl $255,%esi movzbl (%ebp,%esi,1),%esi movzbl %bh,%edi movzbl (%ebp,%edi,1),%edi shll $8,%edi xorl %edi,%esi movl %eax,%edi shrl $16,%edi andl $255,%edi movzbl (%ebp,%edi,1),%edi shll $16,%edi xorl %edi,%esi movl %edx,%edi shrl $24,%edi movzbl (%ebp,%edi,1),%edi shll $24,%edi xorl %edi,%esi movl 20(%esp),%edi andl $255,%edx movzbl (%ebp,%edx,1),%edx movzbl %ch,%ecx movzbl (%ebp,%ecx,1),%ecx shll $8,%ecx xorl %ecx,%edx movl %esi,%ecx shrl $16,%ebx andl $255,%ebx movzbl (%ebp,%ebx,1),%ebx shll $16,%ebx xorl %ebx,%edx movl 8(%esp),%ebx shrl $24,%eax movzbl (%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx movl 4(%esp),%eax leal -2048(%ebp),%ebp addl $16,%edi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .align 64 .LAES_Td: .long 1353184337,1353184337 .long 1399144830,1399144830 .long 3282310938,3282310938 .long 2522752826,2522752826 .long 3412831035,3412831035 .long 4047871263,4047871263 .long 2874735276,2874735276 .long 2466505547,2466505547 .long 1442459680,1442459680 .long 4134368941,4134368941 .long 2440481928,2440481928 .long 625738485,625738485 .long 4242007375,4242007375 .long 3620416197,3620416197 .long 2151953702,2151953702 .long 2409849525,2409849525 .long 1230680542,1230680542 .long 1729870373,1729870373 .long 2551114309,2551114309 .long 3787521629,3787521629 .long 41234371,41234371 .long 317738113,317738113 .long 2744600205,2744600205 .long 3338261355,3338261355 .long 3881799427,3881799427 .long 2510066197,2510066197 .long 3950669247,3950669247 .long 3663286933,3663286933 .long 763608788,763608788 .long 3542185048,3542185048 .long 694804553,694804553 .long 1154009486,1154009486 .long 1787413109,1787413109 .long 2021232372,2021232372 .long 1799248025,1799248025 .long 3715217703,3715217703 .long 3058688446,3058688446 .long 397248752,397248752 .long 1722556617,1722556617 .long 3023752829,3023752829 .long 407560035,407560035 .long 2184256229,2184256229 .long 1613975959,1613975959 .long 1165972322,1165972322 .long 3765920945,3765920945 .long 2226023355,2226023355 .long 480281086,480281086 .long 2485848313,2485848313 .long 1483229296,1483229296 .long 436028815,436028815 .long 2272059028,2272059028 .long 3086515026,3086515026 .long 601060267,601060267 .long 3791801202,3791801202 .long 1468997603,1468997603 .long 715871590,715871590 .long 120122290,120122290 .long 63092015,63092015 .long 2591802758,2591802758 .long 2768779219,2768779219 .long 4068943920,4068943920 .long 2997206819,2997206819 .long 3127509762,3127509762 .long 1552029421,1552029421 .long 723308426,723308426 .long 2461301159,2461301159 .long 4042393587,4042393587 .long 2715969870,2715969870 .long 3455375973,3455375973 .long 3586000134,3586000134 .long 526529745,526529745 .long 2331944644,2331944644 .long 2639474228,2639474228 .long 2689987490,2689987490 .long 853641733,853641733 .long 1978398372,1978398372 .long 971801355,971801355 .long 2867814464,2867814464 .long 111112542,111112542 .long 1360031421,1360031421 .long 4186579262,4186579262 .long 1023860118,1023860118 .long 2919579357,2919579357 .long 1186850381,1186850381 .long 3045938321,3045938321 .long 90031217,90031217 .long 1876166148,1876166148 .long 4279586912,4279586912 .long 620468249,620468249 .long 2548678102,2548678102 .long 3426959497,3426959497 .long 2006899047,2006899047 .long 3175278768,3175278768 .long 2290845959,2290845959 .long 945494503,945494503 .long 3689859193,3689859193 .long 1191869601,1191869601 .long 3910091388,3910091388 .long 3374220536,3374220536 .long 0,0 .long 2206629897,2206629897 .long 1223502642,1223502642 .long 2893025566,2893025566 .long 1316117100,1316117100 .long 4227796733,4227796733 .long 1446544655,1446544655 .long 517320253,517320253 .long 658058550,658058550 .long 1691946762,1691946762 .long 564550760,564550760 .long 3511966619,3511966619 .long 976107044,976107044 .long 2976320012,2976320012 .long 266819475,266819475 .long 3533106868,3533106868 .long 2660342555,2660342555 .long 1338359936,1338359936 .long 2720062561,2720062561 .long 1766553434,1766553434 .long 370807324,370807324 .long 179999714,179999714 .long 3844776128,3844776128 .long 1138762300,1138762300 .long 488053522,488053522 .long 185403662,185403662 .long 2915535858,2915535858 .long 3114841645,3114841645 .long 3366526484,3366526484 .long 2233069911,2233069911 .long 1275557295,1275557295 .long 3151862254,3151862254 .long 4250959779,4250959779 .long 2670068215,2670068215 .long 3170202204,3170202204 .long 3309004356,3309004356 .long 880737115,880737115 .long 1982415755,1982415755 .long 3703972811,3703972811 .long 1761406390,1761406390 .long 1676797112,1676797112 .long 3403428311,3403428311 .long 277177154,277177154 .long 1076008723,1076008723 .long 538035844,538035844 .long 2099530373,2099530373 .long 4164795346,4164795346 .long 288553390,288553390 .long 1839278535,1839278535 .long 1261411869,1261411869 .long 4080055004,4080055004 .long 3964831245,3964831245 .long 3504587127,3504587127 .long 1813426987,1813426987 .long 2579067049,2579067049 .long 4199060497,4199060497 .long 577038663,577038663 .long 3297574056,3297574056 .long 440397984,440397984 .long 3626794326,3626794326 .long 4019204898,4019204898 .long 3343796615,3343796615 .long 3251714265,3251714265 .long 4272081548,4272081548 .long 906744984,906744984 .long 3481400742,3481400742 .long 685669029,685669029 .long 646887386,646887386 .long 2764025151,2764025151 .long 3835509292,3835509292 .long 227702864,227702864 .long 2613862250,2613862250 .long 1648787028,1648787028 .long 3256061430,3256061430 .long 3904428176,3904428176 .long 1593260334,1593260334 .long 4121936770,4121936770 .long 3196083615,3196083615 .long 2090061929,2090061929 .long 2838353263,2838353263 .long 3004310991,3004310991 .long 999926984,999926984 .long 2809993232,2809993232 .long 1852021992,1852021992 .long 2075868123,2075868123 .long 158869197,158869197 .long 4095236462,4095236462 .long 28809964,28809964 .long 2828685187,2828685187 .long 1701746150,1701746150 .long 2129067946,2129067946 .long 147831841,147831841 .long 3873969647,3873969647 .long 3650873274,3650873274 .long 3459673930,3459673930 .long 3557400554,3557400554 .long 3598495785,3598495785 .long 2947720241,2947720241 .long 824393514,824393514 .long 815048134,815048134 .long 3227951669,3227951669 .long 935087732,935087732 .long 2798289660,2798289660 .long 2966458592,2966458592 .long 366520115,366520115 .long 1251476721,1251476721 .long 4158319681,4158319681 .long 240176511,240176511 .long 804688151,804688151 .long 2379631990,2379631990 .long 1303441219,1303441219 .long 1414376140,1414376140 .long 3741619940,3741619940 .long 3820343710,3820343710 .long 461924940,461924940 .long 3089050817,3089050817 .long 2136040774,2136040774 .long 82468509,82468509 .long 1563790337,1563790337 .long 1937016826,1937016826 .long 776014843,776014843 .long 1511876531,1511876531 .long 1389550482,1389550482 .long 861278441,861278441 .long 323475053,323475053 .long 2355222426,2355222426 .long 2047648055,2047648055 .long 2383738969,2383738969 .long 2302415851,2302415851 .long 3995576782,3995576782 .long 902390199,902390199 .long 3991215329,3991215329 .long 1018251130,1018251130 .long 1507840668,1507840668 .long 1064563285,1064563285 .long 2043548696,2043548696 .long 3208103795,3208103795 .long 3939366739,3939366739 .long 1537932639,1537932639 .long 342834655,342834655 .long 2262516856,2262516856 .long 2180231114,2180231114 .long 1053059257,1053059257 .long 741614648,741614648 .long 1598071746,1598071746 .long 1925389590,1925389590 .long 203809468,203809468 .long 2336832552,2336832552 .long 1100287487,1100287487 .long 1895934009,1895934009 .long 3736275976,3736275976 .long 2632234200,2632234200 .long 2428589668,2428589668 .long 1636092795,1636092795 .long 1890988757,1890988757 .long 1952214088,1952214088 .long 1113045200,1113045200 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .byte 82,9,106,213,48,54,165,56 .byte 191,64,163,158,129,243,215,251 .byte 124,227,57,130,155,47,255,135 .byte 52,142,67,68,196,222,233,203 .byte 84,123,148,50,166,194,35,61 .byte 238,76,149,11,66,250,195,78 .byte 8,46,161,102,40,217,36,178 .byte 118,91,162,73,109,139,209,37 .byte 114,248,246,100,134,104,152,22 .byte 212,164,92,204,93,101,182,146 .byte 108,112,72,80,253,237,185,218 .byte 94,21,70,87,167,141,157,132 .byte 144,216,171,0,140,188,211,10 .byte 247,228,88,5,184,179,69,6 .byte 208,44,30,143,202,63,15,2 .byte 193,175,189,3,1,19,138,107 .byte 58,145,17,65,79,103,220,234 .byte 151,242,207,206,240,180,230,115 .byte 150,172,116,34,231,173,53,133 .byte 226,249,55,232,28,117,223,110 .byte 71,241,26,113,29,41,197,137 .byte 111,183,98,14,170,24,190,27 .byte 252,86,62,75,198,210,121,32 .byte 154,219,192,254,120,205,90,244 .byte 31,221,168,51,136,7,199,49 .byte 177,18,16,89,39,128,236,95 .byte 96,81,127,169,25,181,74,13 .byte 45,229,122,159,147,201,156,239 .byte 160,224,59,77,174,42,245,176 .byte 200,235,187,60,131,83,153,97 .byte 23,43,4,126,186,119,214,38 .byte 225,105,20,99,85,33,12,125 .size _x86_AES_decrypt,.-_x86_AES_decrypt .globl AES_decrypt .type AES_decrypt,@function .align 16 AES_decrypt: .L_AES_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%eax subl $36,%esp andl $-64,%esp leal -127(%edi),%ebx subl %esp,%ebx negl %ebx andl $960,%ebx subl %ebx,%esp addl $4,%esp movl %eax,28(%esp) call .L010pic_point .L010pic_point: popl %ebp leal OPENSSL_ia32cap_P,%eax leal .LAES_Td-.L010pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp btl $25,(%eax) jnc .L011x86 movq (%esi),%mm0 movq 8(%esi),%mm4 call _sse_AES_decrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L011x86: movl %ebp,24(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx call _x86_AES_decrypt_compact movl 28(%esp),%esp movl 24(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size AES_decrypt,.-.L_AES_decrypt_begin .globl AES_cbc_encrypt .type AES_cbc_encrypt,@function .align 16 AES_cbc_encrypt: .L_AES_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ecx cmpl $0,%ecx je .L012drop_out call .L013pic_point .L013pic_point: popl %ebp leal OPENSSL_ia32cap_P,%eax cmpl $0,40(%esp) leal .LAES_Te-.L013pic_point(%ebp),%ebp jne .L014picked_te leal .LAES_Td-.LAES_Te(%ebp),%ebp .L014picked_te: pushfl cld cmpl $512,%ecx jb .L015slow_way testl $15,%ecx jnz .L015slow_way btl $28,(%eax) jc .L015slow_way leal -324(%esp),%esi andl $-64,%esi movl %ebp,%eax leal 2304(%ebp),%ebx movl %esi,%edx andl $4095,%eax andl $4095,%ebx andl $4095,%edx cmpl %ebx,%edx jb .L016tbl_break_out subl %ebx,%edx subl %edx,%esi jmp .L017tbl_ok .align 4 .L016tbl_break_out: subl %eax,%edx andl $4095,%edx addl $384,%edx subl %edx,%esi .align 4 .L017tbl_ok: leal 24(%esp),%edx xchgl %esi,%esp addl $4,%esp movl %ebp,24(%esp) movl %esi,28(%esp) movl (%edx),%eax movl 4(%edx),%ebx movl 12(%edx),%edi movl 16(%edx),%esi movl 20(%edx),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edi,44(%esp) movl %esi,48(%esp) movl $0,316(%esp) movl %edi,%ebx movl $61,%ecx subl %ebp,%ebx movl %edi,%esi andl $4095,%ebx leal 76(%esp),%edi cmpl $2304,%ebx jb .L018do_copy cmpl $3852,%ebx jb .L019skip_copy .align 4 .L018do_copy: movl %edi,44(%esp) .long 2784229001 .L019skip_copy: movl $16,%edi .align 4 .L020prefetch_tbl: movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%esi leal 128(%ebp),%ebp subl $1,%edi jnz .L020prefetch_tbl subl $2048,%ebp movl 32(%esp),%esi movl 48(%esp),%edi cmpl $0,%edx je .L021fast_decrypt movl (%edi),%eax movl 4(%edi),%ebx .align 16 .L022fast_enc_loop: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_encrypt movl 32(%esp),%esi movl 36(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) leal 16(%esi),%esi movl 40(%esp),%ecx movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L022fast_enc_loop movl 48(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) cmpl $0,316(%esp) movl 44(%esp),%edi je .L023skip_ezero movl $60,%ecx xorl %eax,%eax .align 4 .long 2884892297 .L023skip_ezero: movl 28(%esp),%esp popfl .L012drop_out: popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L021fast_decrypt: cmpl 36(%esp),%esi je .L024fast_dec_in_place movl %edi,52(%esp) .align 4 .align 16 .L025fast_dec_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_decrypt movl 52(%esp),%edi movl 40(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 36(%esp),%edi movl 32(%esp),%esi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx movl %esi,52(%esp) leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edi movl %edi,36(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L025fast_dec_loop movl 52(%esp),%edi movl 48(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) jmp .L026fast_dec_out .align 16 .L024fast_dec_in_place: .L027fast_dec_in_place_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx leal 60(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 44(%esp),%edi call _x86_AES_decrypt movl 48(%esp),%edi movl 36(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,36(%esp) leal 60(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%esi movl 40(%esp),%ecx leal 16(%esi),%esi movl %esi,32(%esp) subl $16,%ecx movl %ecx,40(%esp) jnz .L027fast_dec_in_place_loop .align 4 .L026fast_dec_out: cmpl $0,316(%esp) movl 44(%esp),%edi je .L028skip_dzero movl $60,%ecx xorl %eax,%eax .align 4 .long 2884892297 .L028skip_dzero: movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L015slow_way: movl (%eax),%eax movl 36(%esp),%edi leal -80(%esp),%esi andl $-64,%esi leal -143(%edi),%ebx subl %esi,%ebx negl %ebx andl $960,%ebx subl %ebx,%esi leal 768(%esi),%ebx subl %ebp,%ebx andl $768,%ebx leal 2176(%ebp,%ebx,1),%ebp leal 24(%esp),%edx xchgl %esi,%esp addl $4,%esp movl %ebp,24(%esp) movl %esi,28(%esp) movl %eax,52(%esp) movl (%edx),%eax movl 4(%edx),%ebx movl 16(%edx),%esi movl 20(%edx),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edi,44(%esp) movl %esi,48(%esp) movl %esi,%edi movl %eax,%esi cmpl $0,%edx je .L029slow_decrypt cmpl $16,%ecx movl %ebx,%edx jb .L030slow_enc_tail btl $25,52(%esp) jnc .L031slow_enc_x86 movq (%edi),%mm0 movq 8(%edi),%mm4 .align 16 .L032slow_enc_loop_sse: pxor (%esi),%mm0 pxor 8(%esi),%mm4 movl 44(%esp),%edi call _sse_AES_encrypt_compact movl 32(%esp),%esi movl 36(%esp),%edi movl 40(%esp),%ecx movq %mm0,(%edi) movq %mm4,8(%edi) leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx cmpl $16,%ecx movl %ecx,40(%esp) jae .L032slow_enc_loop_sse testl $15,%ecx jnz .L030slow_enc_tail movl 48(%esp),%esi movq %mm0,(%esi) movq %mm4,8(%esi) emms movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L031slow_enc_x86: movl (%edi),%eax movl 4(%edi),%ebx .align 4 .L033slow_enc_loop_x86: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 44(%esp),%edi call _x86_AES_encrypt_compact movl 32(%esp),%esi movl 36(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx leal 16(%esi),%esi movl %esi,32(%esp) leal 16(%edi),%edx movl %edx,36(%esp) subl $16,%ecx cmpl $16,%ecx movl %ecx,40(%esp) jae .L033slow_enc_loop_x86 testl $15,%ecx jnz .L030slow_enc_tail movl 48(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L030slow_enc_tail: emms movl %edx,%edi movl $16,%ebx subl %ecx,%ebx cmpl %esi,%edi je .L034enc_in_place .align 4 .long 2767451785 jmp .L035enc_skip_in_place .L034enc_in_place: leal (%edi,%ecx,1),%edi .L035enc_skip_in_place: movl %ebx,%ecx xorl %eax,%eax .align 4 .long 2868115081 movl 48(%esp),%edi movl %edx,%esi movl (%edi),%eax movl 4(%edi),%ebx movl $16,40(%esp) jmp .L033slow_enc_loop_x86 .align 16 .L029slow_decrypt: btl $25,52(%esp) jnc .L036slow_dec_loop_x86 .align 4 .L037slow_dec_loop_sse: movq (%esi),%mm0 movq 8(%esi),%mm4 movl 44(%esp),%edi call _sse_AES_decrypt_compact movl 32(%esp),%esi leal 60(%esp),%eax movl 36(%esp),%ebx movl 40(%esp),%ecx movl 48(%esp),%edi movq (%esi),%mm1 movq 8(%esi),%mm5 pxor (%edi),%mm0 pxor 8(%edi),%mm4 movq %mm1,(%edi) movq %mm5,8(%edi) subl $16,%ecx jc .L038slow_dec_partial_sse movq %mm0,(%ebx) movq %mm4,8(%ebx) leal 16(%ebx),%ebx movl %ebx,36(%esp) leal 16(%esi),%esi movl %esi,32(%esp) movl %ecx,40(%esp) jnz .L037slow_dec_loop_sse emms movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L038slow_dec_partial_sse: movq %mm0,(%eax) movq %mm4,8(%eax) emms addl $16,%ecx movl %ebx,%edi movl %eax,%esi .align 4 .long 2767451785 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L036slow_dec_loop_x86: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx leal 60(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 44(%esp),%edi call _x86_AES_decrypt_compact movl 48(%esp),%edi movl 40(%esp),%esi xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx subl $16,%esi jc .L039slow_dec_partial_x86 movl %esi,40(%esp) movl 36(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,36(%esp) leal 60(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%esi leal 16(%esi),%esi movl %esi,32(%esp) jnz .L036slow_dec_loop_x86 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 16 .L039slow_dec_partial_x86: leal 60(%esp),%esi movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 32(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 40(%esp),%ecx movl 36(%esp),%edi leal 60(%esp),%esi .align 4 .long 2767451785 movl 28(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret .size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin .type _x86_AES_set_encrypt_key,@function .align 16 _x86_AES_set_encrypt_key: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 24(%esp),%esi movl 32(%esp),%edi testl $-1,%esi jz .L040badpointer testl $-1,%edi jz .L040badpointer call .L041pic_point .L041pic_point: popl %ebp leal .LAES_Te-.L041pic_point(%ebp),%ebp leal 2176(%ebp),%ebp movl -128(%ebp),%eax movl -96(%ebp),%ebx movl -64(%ebp),%ecx movl -32(%ebp),%edx movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx movl 28(%esp),%ecx cmpl $128,%ecx je .L04210rounds cmpl $192,%ecx je .L04312rounds cmpl $256,%ecx je .L04414rounds movl $-2,%eax jmp .L045exit .L04210rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) xorl %ecx,%ecx jmp .L04610shortcut .align 4 .L04710loop: movl (%edi),%eax movl 12(%edi),%edx .L04610shortcut: movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,16(%edi) xorl 4(%edi),%eax movl %eax,20(%edi) xorl 8(%edi),%eax movl %eax,24(%edi) xorl 12(%edi),%eax movl %eax,28(%edi) incl %ecx addl $16,%edi cmpl $10,%ecx jl .L04710loop movl $10,80(%edi) xorl %eax,%eax jmp .L045exit .L04312rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 16(%esi),%ecx movl 20(%esi),%edx movl %ecx,16(%edi) movl %edx,20(%edi) xorl %ecx,%ecx jmp .L04812shortcut .align 4 .L04912loop: movl (%edi),%eax movl 20(%edi),%edx .L04812shortcut: movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,24(%edi) xorl 4(%edi),%eax movl %eax,28(%edi) xorl 8(%edi),%eax movl %eax,32(%edi) xorl 12(%edi),%eax movl %eax,36(%edi) cmpl $7,%ecx je .L05012break incl %ecx xorl 16(%edi),%eax movl %eax,40(%edi) xorl 20(%edi),%eax movl %eax,44(%edi) addl $24,%edi jmp .L04912loop .L05012break: movl $12,72(%edi) xorl %eax,%eax jmp .L045exit .L04414rounds: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx movl %eax,16(%edi) movl %ebx,20(%edi) movl %ecx,24(%edi) movl %edx,28(%edi) xorl %ecx,%ecx jmp .L05114shortcut .align 4 .L05214loop: movl 28(%edi),%edx .L05114shortcut: movl (%edi),%eax movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $24,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $8,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $16,%ebx xorl %ebx,%eax xorl 896(%ebp,%ecx,4),%eax movl %eax,32(%edi) xorl 4(%edi),%eax movl %eax,36(%edi) xorl 8(%edi),%eax movl %eax,40(%edi) xorl 12(%edi),%eax movl %eax,44(%edi) cmpl $6,%ecx je .L05314break incl %ecx movl %eax,%edx movl 16(%edi),%eax movzbl %dl,%esi movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shrl $16,%edx shll $8,%ebx movzbl %dl,%esi xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx movzbl %dh,%esi shll $16,%ebx xorl %ebx,%eax movzbl -128(%ebp,%esi,1),%ebx shll $24,%ebx xorl %ebx,%eax movl %eax,48(%edi) xorl 20(%edi),%eax movl %eax,52(%edi) xorl 24(%edi),%eax movl %eax,56(%edi) xorl 28(%edi),%eax movl %eax,60(%edi) addl $32,%edi jmp .L05214loop .L05314break: movl $14,48(%edi) xorl %eax,%eax jmp .L045exit .L040badpointer: movl $-1,%eax .L045exit: popl %edi popl %esi popl %ebx popl %ebp ret .size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key .globl private_AES_set_encrypt_key .type private_AES_set_encrypt_key,@function .align 16 private_AES_set_encrypt_key: .L_private_AES_set_encrypt_key_begin: call _x86_AES_set_encrypt_key ret .size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin .globl private_AES_set_decrypt_key .type private_AES_set_decrypt_key,@function .align 16 private_AES_set_decrypt_key: .L_private_AES_set_decrypt_key_begin: call _x86_AES_set_encrypt_key cmpl $0,%eax je .L054proceed ret .L054proceed: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%esi movl 240(%esi),%ecx leal (,%ecx,4),%ecx leal (%esi,%ecx,4),%edi .align 4 .L055invert: movl (%esi),%eax movl 4(%esi),%ebx movl (%edi),%ecx movl 4(%edi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,(%esi) movl %edx,4(%esi) movl 8(%esi),%eax movl 12(%esi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,8(%edi) movl %ebx,12(%edi) movl %ecx,8(%esi) movl %edx,12(%esi) addl $16,%esi subl $16,%edi cmpl %edi,%esi jne .L055invert movl 28(%esp),%edi movl 240(%edi),%esi leal -2(%esi,%esi,1),%esi leal (%edi,%esi,8),%esi movl %esi,28(%esp) movl 16(%edi),%eax .align 4 .L056permute: addl $16,%edi movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %esi,%ebx movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %eax,%ebx xorl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp xorl %eax,%ecx subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi roll $8,%eax xorl %esi,%edx movl 4(%edi),%ebp xorl %ebx,%eax xorl %edx,%ebx xorl %ecx,%eax roll $24,%ebx xorl %edx,%ecx xorl %edx,%eax roll $16,%ecx xorl %ebx,%eax roll $8,%edx xorl %ecx,%eax movl %ebp,%ebx xorl %edx,%eax movl %eax,(%edi) movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %esi,%ecx movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx xorl %esi,%edx movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp xorl %ebx,%edx subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi roll $8,%ebx xorl %esi,%eax movl 8(%edi),%ebp xorl %ecx,%ebx xorl %eax,%ecx xorl %edx,%ebx roll $24,%ecx xorl %eax,%edx xorl %eax,%ebx roll $16,%edx xorl %ecx,%ebx roll $8,%eax xorl %edx,%ebx movl %ebp,%ecx xorl %eax,%ebx movl %ebx,4(%edi) movl $2155905152,%ebp andl %ecx,%ebp leal (%ecx,%ecx,1),%edx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %esi,%edx movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %ecx,%edx xorl %esi,%eax movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp xorl %ecx,%eax subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi roll $8,%ecx xorl %esi,%ebx movl 12(%edi),%ebp xorl %edx,%ecx xorl %ebx,%edx xorl %eax,%ecx roll $24,%edx xorl %ebx,%eax xorl %ebx,%ecx roll $16,%eax xorl %edx,%ecx roll $8,%ebx xorl %eax,%ecx movl %ebp,%edx xorl %ebx,%ecx movl %ecx,8(%edi) movl $2155905152,%ebp andl %edx,%ebp leal (%edx,%edx,1),%eax movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %esi,%eax movl $2155905152,%ebp andl %eax,%ebp leal (%eax,%eax,1),%ebx movl %ebp,%esi shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %edx,%eax xorl %esi,%ebx movl $2155905152,%ebp andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx movl %ebp,%esi shrl $7,%ebp xorl %edx,%ebx subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi roll $8,%edx xorl %esi,%ecx movl 16(%edi),%ebp xorl %eax,%edx xorl %ecx,%eax xorl %ebx,%edx roll $24,%eax xorl %ecx,%ebx xorl %ecx,%edx roll $16,%ebx xorl %eax,%edx roll $8,%ecx xorl %ebx,%edx movl %ebp,%eax xorl %ecx,%edx movl %edx,12(%edi) cmpl 28(%esp),%edi jb .L056permute xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/aesni-x86.S =================================================================== --- head/secure/lib/libcrypto/i386/aesni-x86.S (revision 299480) +++ head/secure/lib/libcrypto/i386/aesni-x86.S (revision 299481) @@ -1,4900 +1,4901 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from aesni-x86.pl. #ifdef PIC .file "aesni-x86.S" .text .globl aesni_encrypt .type aesni_encrypt,@function .align 16 aesni_encrypt: .L_aesni_encrypt_begin: movl 4(%esp),%eax movl 12(%esp),%edx movups (%eax),%xmm2 movl 240(%edx),%ecx movl 8(%esp),%eax movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L000enc1_loop_1: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L000enc1_loop_1 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%eax) pxor %xmm2,%xmm2 ret .size aesni_encrypt,.-.L_aesni_encrypt_begin .globl aesni_decrypt .type aesni_decrypt,@function .align 16 aesni_decrypt: .L_aesni_decrypt_begin: movl 4(%esp),%eax movl 12(%esp),%edx movups (%eax),%xmm2 movl 240(%edx),%ecx movl 8(%esp),%eax movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L001dec1_loop_2: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L001dec1_loop_2 .byte 102,15,56,223,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%eax) pxor %xmm2,%xmm2 ret .size aesni_decrypt,.-.L_aesni_decrypt_begin .type _aesni_encrypt2,@function .align 16 _aesni_encrypt2: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L002enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L002enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 ret .size _aesni_encrypt2,.-_aesni_encrypt2 .type _aesni_decrypt2,@function .align 16 _aesni_decrypt2: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L003dec2_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L003dec2_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,223,208 .byte 102,15,56,223,216 ret .size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L004enc3_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 movups -16(%edx,%ecx,1),%xmm0 jnz .L004enc3_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 ret .size _aesni_encrypt3,.-_aesni_encrypt3 .type _aesni_decrypt3,@function .align 16 _aesni_decrypt3: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L005dec3_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 movups -16(%edx,%ecx,1),%xmm0 jnz .L005dec3_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 ret .size _aesni_decrypt3,.-_aesni_decrypt3 .type _aesni_encrypt4,@function .align 16 _aesni_encrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 15,31,64,0 addl $16,%ecx .L006enc4_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups -16(%edx,%ecx,1),%xmm0 jnz .L006enc4_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 ret .size _aesni_encrypt4,.-_aesni_encrypt4 .type _aesni_decrypt4,@function .align 16 _aesni_decrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 15,31,64,0 addl $16,%ecx .L007dec4_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups -16(%edx,%ecx,1),%xmm0 jnz .L007dec4_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 ret .size _aesni_decrypt4,.-_aesni_decrypt4 .type _aesni_encrypt6,@function .align 16 _aesni_encrypt6: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,220,209 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,220,217 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 movups (%edx,%ecx,1),%xmm0 addl $16,%ecx jmp .L008_aesni_encrypt6_inner .align 16 .L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .L_aesni_encrypt6_enter: movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 jnz .L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 ret .size _aesni_encrypt6,.-_aesni_encrypt6 .type _aesni_decrypt6,@function .align 16 _aesni_decrypt6: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,222,209 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,222,217 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 movups (%edx,%ecx,1),%xmm0 addl $16,%ecx jmp .L010_aesni_decrypt6_inner .align 16 .L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .L_aesni_decrypt6_enter: movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 jnz .L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 ret .size _aesni_decrypt6,.-_aesni_decrypt6 .globl aesni_ecb_encrypt .type aesni_ecb_encrypt,@function .align 16 aesni_ecb_encrypt: .L_aesni_ecb_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax jz .L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx jz .L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax jb .L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax jmp .L015ecb_enc_loop6_enter .align 16 .L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) movdqu 16(%esi),%xmm3 movups %xmm4,32(%edi) movdqu 32(%esi),%xmm4 movups %xmm5,48(%edi) movdqu 48(%esi),%xmm5 movups %xmm6,64(%edi) movdqu 64(%esi),%xmm6 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi .L015ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax jnc .L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax jz .L012ecb_ret .L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax jb .L017ecb_enc_one movups 16(%esi),%xmm3 je .L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax jb .L019ecb_enc_three movups 48(%esi),%xmm5 je .L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L012ecb_ret .align 16 .L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) jmp .L012ecb_ret .align 16 .L018ecb_enc_two: call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L012ecb_ret .align 16 .L019ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L012ecb_ret .align 16 .L020ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) jmp .L012ecb_ret .align 16 .L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax jb .L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax jmp .L023ecb_dec_loop6_enter .align 16 .L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) movdqu 16(%esi),%xmm3 movups %xmm4,32(%edi) movdqu 32(%esi),%xmm4 movups %xmm5,48(%edi) movdqu 48(%esi),%xmm5 movups %xmm6,64(%edi) movdqu 64(%esi),%xmm6 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi .L023ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax jnc .L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax jz .L012ecb_ret .L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax jb .L025ecb_dec_one movups 16(%esi),%xmm3 je .L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax jb .L027ecb_dec_three movups 48(%esi),%xmm5 je .L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L012ecb_ret .align 16 .L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) jmp .L012ecb_ret .align 16 .L026ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L012ecb_ret .align 16 .L027ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L012ecb_ret .align 16 .L028ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) .L012ecb_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin .globl aesni_ccm64_encrypt_blocks .type aesni_ccm64_encrypt_blocks,@function .align 16 aesni_ccm64_encrypt_blocks: .L_aesni_ccm64_encrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl 40(%esp),%ecx movl %esp,%ebp subl $60,%esp andl $-16,%esp movl %ebp,48(%esp) movdqu (%ebx),%xmm7 movdqu (%ecx),%xmm3 movl 240(%edx),%ecx movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $1,%ebx xorl %ebp,%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) shll $4,%ecx movl $16,%ebx leal (%edx),%ebp movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 .L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 xorps %xmm0,%xmm2 movups 16(%ebp),%xmm1 xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 .L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 decl %eax .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi jnz .L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin .globl aesni_ccm64_decrypt_blocks .type aesni_ccm64_decrypt_blocks,@function .align 16 aesni_ccm64_decrypt_blocks: .L_aesni_ccm64_decrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl 40(%esp),%ecx movl %esp,%ebp subl $60,%esp andl $-16,%esp movl %ebp,48(%esp) movdqu (%ebx),%xmm7 movdqu (%ecx),%xmm3 movl 240(%edx),%ecx movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $1,%ebx xorl %ebp,%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 movl %edx,%ebp movl %ecx,%ebx .byte 102,15,56,0,253 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx movups (%esi),%xmm6 paddq 16(%esp),%xmm7 leal 16(%esi),%esi subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx jmp .L033ccm64_dec_outer .align 16 .L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax jz .L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 xorps %xmm0,%xmm6 xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 .L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi jmp .L033ccm64_dec_outer .align 16 .L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 .L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: .L_aesni_ctr32_encrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl %esp,%ebp subl $88,%esp andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax je .L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $6,%ecx xorl %ebp,%ebp movl %ecx,16(%esp) movl %ecx,20(%esp) movl %ecx,24(%esp) movl %ebp,28(%esp) .byte 102,15,58,22,251,3 .byte 102,15,58,34,253,3 movl 240(%edx),%ecx bswap %ebx pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movdqa (%esp),%xmm2 .byte 102,15,58,34,195,0 leal 3(%ebx),%ebp .byte 102,15,58,34,205,0 incl %ebx .byte 102,15,58,34,195,1 incl %ebp .byte 102,15,58,34,205,1 incl %ebx .byte 102,15,58,34,195,2 incl %ebp .byte 102,15,58,34,205,2 movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 movdqu (%edx),%xmm6 movdqa %xmm1,64(%esp) .byte 102,15,56,0,202 pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax jb .L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx movdqa %xmm7,32(%esp) movl %edx,%ebp subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax jmp .L039ctr32_loop6 .align 16 .L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 pxor %xmm0,%xmm2 pshufd $128,%xmm1,%xmm6 pxor %xmm0,%xmm3 pshufd $64,%xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 .byte 102,15,56,220,209 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 .byte 102,15,56,220,217 movups 32(%ebp),%xmm0 movl %ebx,%ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps %xmm1,%xmm2 movups 32(%esi),%xmm1 xorps %xmm0,%xmm3 movups %xmm2,(%edi) movdqa 16(%esp),%xmm0 xorps %xmm1,%xmm4 movdqa 64(%esp),%xmm1 movups %xmm3,16(%edi) movups %xmm4,32(%edi) paddd %xmm0,%xmm1 paddd 48(%esp),%xmm0 movdqa (%esp),%xmm2 movups 48(%esi),%xmm3 movups 64(%esi),%xmm4 xorps %xmm3,%xmm5 movups 80(%esi),%xmm3 leal 96(%esi),%esi movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 xorps %xmm4,%xmm6 movups %xmm5,48(%edi) xorps %xmm3,%xmm7 movdqa %xmm1,64(%esp) .byte 102,15,56,0,202 movups %xmm6,64(%edi) pshufd $192,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax jnc .L039ctr32_loop6 addl $6,%eax jz .L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx .L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax jb .L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 je .L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax jb .L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 je .L044ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps %xmm1,%xmm2 movups 32(%esi),%xmm1 xorps %xmm0,%xmm3 movups 48(%esi),%xmm0 xorps %xmm1,%xmm4 movups 64(%esi),%xmm1 xorps %xmm0,%xmm5 movups %xmm2,(%edi) xorps %xmm1,%xmm6 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L040ctr32_ret .align 16 .L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx .L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) jmp .L040ctr32_ret .align 16 .L042ctr32_two: call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L040ctr32_ret .align 16 .L043ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 movups 32(%esi),%xmm7 xorps %xmm6,%xmm3 movups %xmm2,(%edi) xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L040ctr32_ret .align 16 .L044ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 movups 32(%esi),%xmm1 xorps %xmm6,%xmm2 movups 48(%esi),%xmm0 xorps %xmm7,%xmm3 movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) .L040ctr32_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin .globl aesni_xts_encrypt .type aesni_xts_encrypt,@function .align 16 aesni_xts_encrypt: .L_aesni_xts_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 36(%esp),%edx movl 40(%esp),%esi movl 240(%edx),%ecx movups (%esi),%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl %esp,%ebp subl $120,%esp movl 240(%edx),%ecx andl $-16,%esp movl $135,96(%esp) movl $0,100(%esp) movl $1,104(%esp) movl $0,108(%esp) movl %eax,112(%esp) movl %ebp,116(%esp) movdqa %xmm2,%xmm1 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 pcmpgtd %xmm1,%xmm0 andl $-16,%eax movl %edx,%ebp movl %ecx,%ebx subl $96,%eax jc .L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx jmp .L048xts_enc_loop6 .align 16 .L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,16(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,32(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm7 movdqa %xmm1,64(%esp) paddq %xmm1,%xmm1 movups (%ebp),%xmm0 pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 pxor %xmm0,%xmm3 movdqu 48(%esi),%xmm5 pxor %xmm0,%xmm4 movdqu 64(%esi),%xmm6 pxor %xmm0,%xmm5 movdqu 80(%esi),%xmm1 pxor %xmm0,%xmm6 leal 96(%esi),%esi pxor (%esp),%xmm2 movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor 16(%esp),%xmm3 pxor 32(%esp),%xmm4 .byte 102,15,56,220,209 pxor 48(%esp),%xmm5 pxor 64(%esp),%xmm6 .byte 102,15,56,220,217 pxor %xmm0,%xmm7 movups 32(%ebp),%xmm0 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movdqa 80(%esp),%xmm1 pxor %xmm0,%xmm0 xorps (%esp),%xmm2 pcmpgtd %xmm1,%xmm0 xorps 16(%esp),%xmm3 movups %xmm2,(%edi) xorps 32(%esp),%xmm4 movups %xmm3,16(%edi) xorps 48(%esp),%xmm5 movups %xmm4,32(%edi) xorps 64(%esp),%xmm6 movups %xmm5,48(%edi) xorps %xmm1,%xmm7 movups %xmm6,64(%edi) pshufd $19,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqa 96(%esp),%xmm3 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax jnc .L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx .L047xts_enc_short: addl $96,%eax jz .L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax jb .L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 je .L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax jb .L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) je .L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm7 pxor %xmm1,%xmm7 movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 pxor (%esp),%xmm2 movdqu 48(%esi),%xmm5 pxor 16(%esp),%xmm3 movdqu 64(%esi),%xmm6 pxor 32(%esp),%xmm4 leal 80(%esi),%esi pxor 48(%esp),%xmm5 movdqa %xmm7,64(%esp) pxor %xmm7,%xmm6 call _aesni_encrypt6 movaps 64(%esp),%xmm1 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps 32(%esp),%xmm4 movups %xmm2,(%edi) xorps 48(%esp),%xmm5 movups %xmm3,16(%edi) xorps %xmm1,%xmm6 movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi jmp .L054xts_enc_done .align 16 .L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 jmp .L054xts_enc_done .align 16 .L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 call _aesni_encrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 jmp .L054xts_enc_done .align 16 .L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 leal 48(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 call _aesni_encrypt3 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 jmp .L054xts_enc_done .align 16 .L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 xorps (%esp),%xmm2 movups 48(%esi),%xmm5 leal 64(%esi),%esi xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 xorps %xmm6,%xmm5 call _aesni_encrypt4 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) xorps %xmm6,%xmm5 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 jmp .L054xts_enc_done .align 16 .L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax jz .L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) jmp .L057xts_enc_steal .align 16 .L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax jz .L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 .L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi movb %cl,-16(%edi) movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax jnz .L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx movups -16(%edi),%xmm2 xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) .L056xts_enc_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movdqa %xmm0,(%esp) pxor %xmm3,%xmm3 movdqa %xmm0,16(%esp) pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin .globl aesni_xts_decrypt .type aesni_xts_decrypt,@function .align 16 aesni_xts_decrypt: .L_aesni_xts_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 36(%esp),%edx movl 40(%esp),%esi movl 240(%edx),%ecx movups (%esi),%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl %esp,%ebp subl $120,%esp andl $-16,%esp xorl %ebx,%ebx testl $15,%eax setnz %bl shll $4,%ebx subl %ebx,%eax movl $135,96(%esp) movl $0,100(%esp) movl $1,104(%esp) movl $0,108(%esp) movl %eax,112(%esp) movl %ebp,116(%esp) movl 240(%edx),%ecx movl %edx,%ebp movl %ecx,%ebx movdqa %xmm2,%xmm1 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax jc .L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx jmp .L061xts_dec_loop6 .align 16 .L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,16(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,32(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm7 movdqa %xmm1,64(%esp) paddq %xmm1,%xmm1 movups (%ebp),%xmm0 pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 pxor %xmm0,%xmm3 movdqu 48(%esi),%xmm5 pxor %xmm0,%xmm4 movdqu 64(%esi),%xmm6 pxor %xmm0,%xmm5 movdqu 80(%esi),%xmm1 pxor %xmm0,%xmm6 leal 96(%esi),%esi pxor (%esp),%xmm2 movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor 16(%esp),%xmm3 pxor 32(%esp),%xmm4 .byte 102,15,56,222,209 pxor 48(%esp),%xmm5 pxor 64(%esp),%xmm6 .byte 102,15,56,222,217 pxor %xmm0,%xmm7 movups 32(%ebp),%xmm0 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 call .L_aesni_decrypt6_enter movdqa 80(%esp),%xmm1 pxor %xmm0,%xmm0 xorps (%esp),%xmm2 pcmpgtd %xmm1,%xmm0 xorps 16(%esp),%xmm3 movups %xmm2,(%edi) xorps 32(%esp),%xmm4 movups %xmm3,16(%edi) xorps 48(%esp),%xmm5 movups %xmm4,32(%edi) xorps 64(%esp),%xmm6 movups %xmm5,48(%edi) xorps %xmm1,%xmm7 movups %xmm6,64(%edi) pshufd $19,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqa 96(%esp),%xmm3 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax jnc .L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx .L060xts_dec_short: addl $96,%eax jz .L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax jb .L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 je .L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax jb .L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) je .L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm7 pxor %xmm1,%xmm7 movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 pxor (%esp),%xmm2 movdqu 48(%esi),%xmm5 pxor 16(%esp),%xmm3 movdqu 64(%esi),%xmm6 pxor 32(%esp),%xmm4 leal 80(%esi),%esi pxor 48(%esp),%xmm5 movdqa %xmm7,64(%esp) pxor %xmm7,%xmm6 call _aesni_decrypt6 movaps 64(%esp),%xmm1 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps 32(%esp),%xmm4 movups %xmm2,(%edi) xorps 48(%esp),%xmm5 movups %xmm3,16(%edi) xorps %xmm1,%xmm6 movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi jmp .L067xts_dec_done .align 16 .L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 jmp .L067xts_dec_done .align 16 .L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 call _aesni_decrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 jmp .L067xts_dec_done .align 16 .L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 leal 48(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 call _aesni_decrypt3 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 jmp .L067xts_dec_done .align 16 .L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 xorps (%esp),%xmm2 movups 48(%esi),%xmm5 leal 64(%esi),%esi xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 xorps %xmm6,%xmm5 call _aesni_decrypt4 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) xorps %xmm6,%xmm5 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 jmp .L067xts_dec_done .align 16 .L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax jz .L069xts_dec_ret movl %eax,112(%esp) jmp .L070xts_dec_only_one_more .align 16 .L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax jz .L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 .L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm5 pxor %xmm1,%xmm5 movl %ebp,%edx movl %ebx,%ecx movups (%esi),%xmm2 xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) .L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi movb %cl,(%edi) movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax jnz .L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx movups (%edi),%xmm2 xorps %xmm6,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) .L069xts_dec_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movdqa %xmm0,(%esp) pxor %xmm3,%xmm3 movdqa %xmm0,16(%esp) pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin .globl aesni_cbc_encrypt .type aesni_cbc_encrypt,@function .align 16 aesni_cbc_encrypt: .L_aesni_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl %esp,%ebx movl 24(%esp),%edi subl $24,%ebx movl 28(%esp),%eax andl $-16,%ebx movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax jz .L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 movl 240(%edx),%ecx movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx je .L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax jb .L076cbc_enc_tail subl $16,%eax jmp .L077cbc_enc_loop .align 16 .L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 .L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax jnc .L077cbc_enc_loop addl $16,%eax jnz .L076cbc_enc_tail movaps %xmm2,%xmm7 pxor %xmm2,%xmm2 jmp .L079cbc_ret .L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx subl %eax,%ecx xorl %eax,%eax .long 2868115081 leal -16(%edi),%edi movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx jmp .L077cbc_enc_loop .align 16 .L075cbc_decrypt: cmpl $80,%eax jbe .L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax jmp .L081cbc_dec_loop6_enter .align 16 .L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi .L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 call _aesni_decrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps (%esp),%xmm2 xorps %xmm1,%xmm3 movups 32(%esi),%xmm1 xorps %xmm0,%xmm4 movups 48(%esi),%xmm0 xorps %xmm1,%xmm5 movups 64(%esi),%xmm1 xorps %xmm0,%xmm6 movups 80(%esi),%xmm0 xorps %xmm1,%xmm7 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 96(%esi),%esi movups %xmm4,32(%edi) movl %ebx,%ecx movups %xmm5,48(%edi) movl %ebp,%edx movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax ja .L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax jle .L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi .L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax jbe .L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax jbe .L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax jbe .L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax jbe .L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 xorps %xmm7,%xmm7 call _aesni_decrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps (%esp),%xmm2 xorps %xmm1,%xmm3 movups 32(%esi),%xmm1 xorps %xmm0,%xmm4 movups 48(%esi),%xmm0 xorps %xmm1,%xmm5 movups 64(%esi),%xmm7 xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) pxor %xmm3,%xmm3 movups %xmm4,32(%edi) pxor %xmm4,%xmm4 movups %xmm5,48(%edi) pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 pxor %xmm6,%xmm6 subl $80,%eax jmp .L088cbc_dec_tail_collected .align 16 .L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax jmp .L088cbc_dec_tail_collected .align 16 .L085cbc_dec_two: call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax jmp .L088cbc_dec_tail_collected .align 16 .L086cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 pxor %xmm4,%xmm4 movups %xmm3,16(%edi) pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax jmp .L088cbc_dec_tail_collected .align 16 .L087cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 xorps %xmm7,%xmm2 movups 48(%esi),%xmm7 xorps %xmm6,%xmm3 movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 pxor %xmm5,%xmm5 subl $64,%eax jmp .L088cbc_dec_tail_collected .align 16 .L083cbc_dec_clear_tail_collected: pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 .L088cbc_dec_tail_collected: andl $15,%eax jnz .L090cbc_dec_tail_partial movups %xmm2,(%edi) pxor %xmm0,%xmm0 jmp .L079cbc_ret .align 16 .L090cbc_dec_tail_partial: movaps %xmm2,(%esp) pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 movdqa %xmm2,(%esp) .L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp pxor %xmm2,%xmm2 pxor %xmm1,%xmm1 movups %xmm7,(%ebp) pxor %xmm7,%xmm7 .L074cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin .type _aesni_set_encrypt_key,@function .align 16 _aesni_set_encrypt_key: pushl %ebp pushl %ebx testl %eax,%eax jz .L091bad_pointer testl %edx,%edx jz .L091bad_pointer call .L092pic .L092pic: popl %ebx leal .Lkey_const-.L092pic(%ebx),%ebx leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 movl 4(%ebp),%ebp leal 16(%edx),%edx andl $268437504,%ebp cmpl $256,%ecx je .L09314rounds cmpl $192,%ecx je .L09412rounds cmpl $128,%ecx jne .L095bad_keybits .align 16 .L09610rounds: cmpl $268435456,%ebp je .L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 call .L098key_128_cold .byte 102,15,58,223,200,2 call .L099key_128 .byte 102,15,58,223,200,4 call .L099key_128 .byte 102,15,58,223,200,8 call .L099key_128 .byte 102,15,58,223,200,16 call .L099key_128 .byte 102,15,58,223,200,32 call .L099key_128 .byte 102,15,58,223,200,64 call .L099key_128 .byte 102,15,58,223,200,128 call .L099key_128 .byte 102,15,58,223,200,27 call .L099key_128 .byte 102,15,58,223,200,54 call .L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) jmp .L100good_key .align 16 .L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx .L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 ret .align 16 .L09710rounds_alt: movdqa (%ebx),%xmm5 movl $8,%ecx movdqa 32(%ebx),%xmm4 movdqa %xmm0,%xmm2 movdqu %xmm0,-16(%edx) .L101loop_key128: .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 leal 16(%edx),%edx movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,-16(%edx) movdqa %xmm0,%xmm2 decl %ecx jnz .L101loop_key128 movdqa 48(%ebx),%xmm4 .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,(%edx) movdqa %xmm0,%xmm2 .byte 102,15,56,0,197 .byte 102,15,56,221,196 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,16(%edx) movl $9,%ecx movl %ecx,96(%edx) jmp .L100good_key .align 16 .L09412rounds: movq 16(%eax),%xmm2 cmpl $268435456,%ebp je .L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 call .L103key_192a_cold .byte 102,15,58,223,202,2 call .L104key_192b .byte 102,15,58,223,202,4 call .L105key_192a .byte 102,15,58,223,202,8 call .L104key_192b .byte 102,15,58,223,202,16 call .L105key_192a .byte 102,15,58,223,202,32 call .L104key_192b .byte 102,15,58,223,202,64 call .L105key_192a .byte 102,15,58,223,202,128 call .L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) jmp .L100good_key .align 16 .L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 .L103key_192a_cold: movaps %xmm2,%xmm5 .L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 pslldq $4,%xmm3 xorps %xmm4,%xmm0 pshufd $85,%xmm1,%xmm1 pxor %xmm3,%xmm2 pxor %xmm1,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm3,%xmm2 ret .align 16 .L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx jmp .L106key_192b_warm .align 16 .L10212rounds_alt: movdqa 16(%ebx),%xmm5 movdqa 32(%ebx),%xmm4 movl $8,%ecx movdqu %xmm0,-16(%edx) .L107loop_key192: movq %xmm2,(%edx) movdqa %xmm2,%xmm1 .byte 102,15,56,0,213 .byte 102,15,56,221,212 pslld $1,%xmm4 leal 24(%edx),%edx movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pxor %xmm2,%xmm0 pxor %xmm3,%xmm2 movdqu %xmm0,-16(%edx) decl %ecx jnz .L107loop_key192 movl $11,%ecx movl %ecx,32(%edx) jmp .L100good_key .align 16 .L09314rounds: movups 16(%eax),%xmm2 leal 16(%edx),%edx cmpl $268435456,%ebp je .L10814rounds_alt movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 call .L109key_256a_cold .byte 102,15,58,223,200,1 call .L110key_256b .byte 102,15,58,223,202,2 call .L111key_256a .byte 102,15,58,223,200,2 call .L110key_256b .byte 102,15,58,223,202,4 call .L111key_256a .byte 102,15,58,223,200,4 call .L110key_256b .byte 102,15,58,223,202,8 call .L111key_256a .byte 102,15,58,223,200,8 call .L110key_256b .byte 102,15,58,223,202,16 call .L111key_256a .byte 102,15,58,223,200,16 call .L110key_256b .byte 102,15,58,223,202,32 call .L111key_256a .byte 102,15,58,223,200,32 call .L110key_256b .byte 102,15,58,223,202,64 call .L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax jmp .L100good_key .align 16 .L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx .L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 ret .align 16 .L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $140,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret .align 16 .L10814rounds_alt: movdqa (%ebx),%xmm5 movdqa 32(%ebx),%xmm4 movl $7,%ecx movdqu %xmm0,-32(%edx) movdqa %xmm2,%xmm1 movdqu %xmm2,-16(%edx) .L112loop_key256: .byte 102,15,56,0,213 .byte 102,15,56,221,212 movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pslld $1,%xmm4 pxor %xmm2,%xmm0 movdqu %xmm0,(%edx) decl %ecx jz .L113done_key256 pshufd $255,%xmm0,%xmm2 pxor %xmm3,%xmm3 .byte 102,15,56,221,211 movdqa %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm3,%xmm1 pxor %xmm1,%xmm2 movdqu %xmm2,16(%edx) leal 32(%edx),%edx movdqa %xmm2,%xmm1 jmp .L112loop_key256 .L113done_key256: movl $13,%ecx movl %ecx,16(%edx) .L100good_key: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 xorl %eax,%eax popl %ebx popl %ebp ret .align 4 .L091bad_pointer: movl $-1,%eax popl %ebx popl %ebp ret .align 4 .L095bad_keybits: pxor %xmm0,%xmm0 movl $-2,%eax popl %ebx popl %ebp ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key .globl aesni_set_encrypt_key .type aesni_set_encrypt_key,@function .align 16 aesni_set_encrypt_key: .L_aesni_set_encrypt_key_begin: movl 4(%esp),%eax movl 8(%esp),%ecx movl 12(%esp),%edx call _aesni_set_encrypt_key ret .size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin .globl aesni_set_decrypt_key .type aesni_set_decrypt_key,@function .align 16 aesni_set_decrypt_key: .L_aesni_set_decrypt_key_begin: movl 4(%esp),%eax movl 8(%esp),%ecx movl 12(%esp),%edx call _aesni_set_encrypt_key movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax jnz .L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 movups %xmm0,(%eax) movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax .L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 .byte 102,15,56,219,201 leal 16(%edx),%edx leal -16(%eax),%eax movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax ja .L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 xorl %eax,%eax .L114dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin .align 64 .Lkey_const: .long 202313229,202313229,202313229,202313229 .long 67569157,67569157,67569157,67569157 .long 1,1,1,1 .long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "aesni-x86.S" .text .globl aesni_encrypt .type aesni_encrypt,@function .align 16 aesni_encrypt: .L_aesni_encrypt_begin: movl 4(%esp),%eax movl 12(%esp),%edx movups (%eax),%xmm2 movl 240(%edx),%ecx movl 8(%esp),%eax movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L000enc1_loop_1: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L000enc1_loop_1 .byte 102,15,56,221,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%eax) pxor %xmm2,%xmm2 ret .size aesni_encrypt,.-.L_aesni_encrypt_begin .globl aesni_decrypt .type aesni_decrypt,@function .align 16 aesni_decrypt: .L_aesni_decrypt_begin: movl 4(%esp),%eax movl 12(%esp),%edx movups (%eax),%xmm2 movl 240(%edx),%ecx movl 8(%esp),%eax movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L001dec1_loop_2: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L001dec1_loop_2 .byte 102,15,56,223,209 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movups %xmm2,(%eax) pxor %xmm2,%xmm2 ret .size aesni_decrypt,.-.L_aesni_decrypt_begin .type _aesni_encrypt2,@function .align 16 _aesni_encrypt2: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L002enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L002enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 ret .size _aesni_encrypt2,.-_aesni_encrypt2 .type _aesni_decrypt2,@function .align 16 _aesni_decrypt2: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L003dec2_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L003dec2_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,223,208 .byte 102,15,56,223,216 ret .size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L004enc3_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 movups -16(%edx,%ecx,1),%xmm0 jnz .L004enc3_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 ret .size _aesni_encrypt3,.-_aesni_encrypt3 .type _aesni_decrypt3,@function .align 16 _aesni_decrypt3: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx addl $16,%ecx .L005dec3_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 movups -16(%edx,%ecx,1),%xmm0 jnz .L005dec3_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 ret .size _aesni_decrypt3,.-_aesni_decrypt3 .type _aesni_encrypt4,@function .align 16 _aesni_encrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 15,31,64,0 addl $16,%ecx .L006enc4_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups -16(%edx,%ecx,1),%xmm0 jnz .L006enc4_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 ret .size _aesni_encrypt4,.-_aesni_encrypt4 .type _aesni_decrypt4,@function .align 16 _aesni_decrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 movups 32(%edx),%xmm0 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 15,31,64,0 addl $16,%ecx .L007dec4_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups -16(%edx,%ecx,1),%xmm0 jnz .L007dec4_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 ret .size _aesni_decrypt4,.-_aesni_decrypt4 .type _aesni_encrypt6,@function .align 16 _aesni_encrypt6: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,220,209 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,220,217 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 movups (%edx,%ecx,1),%xmm0 addl $16,%ecx jmp .L008_aesni_encrypt6_inner .align 16 .L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .L_aesni_encrypt6_enter: movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 jnz .L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 ret .size _aesni_encrypt6,.-_aesni_encrypt6 .type _aesni_decrypt6,@function .align 16 _aesni_decrypt6: movups (%edx),%xmm0 shll $4,%ecx movups 16(%edx),%xmm1 xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 .byte 102,15,56,222,209 pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 .byte 102,15,56,222,217 leal 32(%edx,%ecx,1),%edx negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 movups (%edx,%ecx,1),%xmm0 addl $16,%ecx jmp .L010_aesni_decrypt6_inner .align 16 .L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .L_aesni_decrypt6_enter: movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 jnz .L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 ret .size _aesni_decrypt6,.-_aesni_decrypt6 .globl aesni_ecb_encrypt .type aesni_ecb_encrypt,@function .align 16 aesni_ecb_encrypt: .L_aesni_ecb_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax jz .L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx jz .L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax jb .L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax jmp .L015ecb_enc_loop6_enter .align 16 .L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) movdqu 16(%esi),%xmm3 movups %xmm4,32(%edi) movdqu 32(%esi),%xmm4 movups %xmm5,48(%edi) movdqu 48(%esi),%xmm5 movups %xmm6,64(%edi) movdqu 64(%esi),%xmm6 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi .L015ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax jnc .L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax jz .L012ecb_ret .L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax jb .L017ecb_enc_one movups 16(%esi),%xmm3 je .L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax jb .L019ecb_enc_three movups 48(%esi),%xmm5 je .L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L012ecb_ret .align 16 .L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) jmp .L012ecb_ret .align 16 .L018ecb_enc_two: call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L012ecb_ret .align 16 .L019ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L012ecb_ret .align 16 .L020ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) jmp .L012ecb_ret .align 16 .L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax jb .L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax jmp .L023ecb_dec_loop6_enter .align 16 .L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) movdqu 16(%esi),%xmm3 movups %xmm4,32(%edi) movdqu 32(%esi),%xmm4 movups %xmm5,48(%edi) movdqu 48(%esi),%xmm5 movups %xmm6,64(%edi) movdqu 64(%esi),%xmm6 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi .L023ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax jnc .L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax jz .L012ecb_ret .L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax jb .L025ecb_dec_one movups 16(%esi),%xmm3 je .L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax jb .L027ecb_dec_three movups 48(%esi),%xmm5 je .L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L012ecb_ret .align 16 .L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) jmp .L012ecb_ret .align 16 .L026ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L012ecb_ret .align 16 .L027ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L012ecb_ret .align 16 .L028ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) .L012ecb_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin .globl aesni_ccm64_encrypt_blocks .type aesni_ccm64_encrypt_blocks,@function .align 16 aesni_ccm64_encrypt_blocks: .L_aesni_ccm64_encrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl 40(%esp),%ecx movl %esp,%ebp subl $60,%esp andl $-16,%esp movl %ebp,48(%esp) movdqu (%ebx),%xmm7 movdqu (%ecx),%xmm3 movl 240(%edx),%ecx movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $1,%ebx xorl %ebp,%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) shll $4,%ecx movl $16,%ebx leal (%edx),%ebp movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 .L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 xorps %xmm0,%xmm2 movups 16(%ebp),%xmm1 xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 .L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 decl %eax .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi jnz .L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin .globl aesni_ccm64_decrypt_blocks .type aesni_ccm64_decrypt_blocks,@function .align 16 aesni_ccm64_decrypt_blocks: .L_aesni_ccm64_decrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl 40(%esp),%ecx movl %esp,%ebp subl $60,%esp andl $-16,%esp movl %ebp,48(%esp) movdqu (%ebx),%xmm7 movdqu (%ecx),%xmm3 movl 240(%edx),%ecx movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $1,%ebx xorl %ebp,%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 movl %edx,%ebp movl %ecx,%ebx .byte 102,15,56,0,253 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx movups (%esi),%xmm6 paddq 16(%esp),%xmm7 leal 16(%esi),%esi subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx jmp .L033ccm64_dec_outer .align 16 .L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax jz .L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 xorps %xmm0,%xmm6 xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 .L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 jnz .L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi jmp .L033ccm64_dec_outer .align 16 .L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 .L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: .L_aesni_ctr32_encrypt_blocks_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl 36(%esp),%ebx movl %esp,%ebp subl $88,%esp andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax je .L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) movl $67438087,8(%esp) movl $66051,12(%esp) movl $6,%ecx xorl %ebp,%ebp movl %ecx,16(%esp) movl %ecx,20(%esp) movl %ecx,24(%esp) movl %ebp,28(%esp) .byte 102,15,58,22,251,3 .byte 102,15,58,34,253,3 movl 240(%edx),%ecx bswap %ebx pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 movdqa (%esp),%xmm2 .byte 102,15,58,34,195,0 leal 3(%ebx),%ebp .byte 102,15,58,34,205,0 incl %ebx .byte 102,15,58,34,195,1 incl %ebp .byte 102,15,58,34,205,1 incl %ebx .byte 102,15,58,34,195,2 incl %ebp .byte 102,15,58,34,205,2 movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 movdqu (%edx),%xmm6 movdqa %xmm1,64(%esp) .byte 102,15,56,0,202 pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax jb .L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx movdqa %xmm7,32(%esp) movl %edx,%ebp subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax jmp .L039ctr32_loop6 .align 16 .L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 pxor %xmm0,%xmm2 pshufd $128,%xmm1,%xmm6 pxor %xmm0,%xmm3 pshufd $64,%xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 .byte 102,15,56,220,209 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 .byte 102,15,56,220,217 movups 32(%ebp),%xmm0 movl %ebx,%ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps %xmm1,%xmm2 movups 32(%esi),%xmm1 xorps %xmm0,%xmm3 movups %xmm2,(%edi) movdqa 16(%esp),%xmm0 xorps %xmm1,%xmm4 movdqa 64(%esp),%xmm1 movups %xmm3,16(%edi) movups %xmm4,32(%edi) paddd %xmm0,%xmm1 paddd 48(%esp),%xmm0 movdqa (%esp),%xmm2 movups 48(%esi),%xmm3 movups 64(%esi),%xmm4 xorps %xmm3,%xmm5 movups 80(%esi),%xmm3 leal 96(%esi),%esi movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 xorps %xmm4,%xmm6 movups %xmm5,48(%edi) xorps %xmm3,%xmm7 movdqa %xmm1,64(%esp) .byte 102,15,56,0,202 movups %xmm6,64(%edi) pshufd $192,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax jnc .L039ctr32_loop6 addl $6,%eax jz .L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx .L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax jb .L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 je .L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax jb .L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 je .L044ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps %xmm1,%xmm2 movups 32(%esi),%xmm1 xorps %xmm0,%xmm3 movups 48(%esi),%xmm0 xorps %xmm1,%xmm4 movups 64(%esi),%xmm1 xorps %xmm0,%xmm5 movups %xmm2,(%edi) xorps %xmm1,%xmm6 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) jmp .L040ctr32_ret .align 16 .L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx .L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) jmp .L040ctr32_ret .align 16 .L042ctr32_two: call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) jmp .L040ctr32_ret .align 16 .L043ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 movups 32(%esi),%xmm7 xorps %xmm6,%xmm3 movups %xmm2,(%edi) xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) jmp .L040ctr32_ret .align 16 .L044ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 movups 32(%esi),%xmm1 xorps %xmm6,%xmm2 movups 48(%esi),%xmm0 xorps %xmm7,%xmm3 movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) .L040ctr32_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin .globl aesni_xts_encrypt .type aesni_xts_encrypt,@function .align 16 aesni_xts_encrypt: .L_aesni_xts_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 36(%esp),%edx movl 40(%esp),%esi movl 240(%edx),%ecx movups (%esi),%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl %esp,%ebp subl $120,%esp movl 240(%edx),%ecx andl $-16,%esp movl $135,96(%esp) movl $0,100(%esp) movl $1,104(%esp) movl $0,108(%esp) movl %eax,112(%esp) movl %ebp,116(%esp) movdqa %xmm2,%xmm1 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 pcmpgtd %xmm1,%xmm0 andl $-16,%eax movl %edx,%ebp movl %ecx,%ebx subl $96,%eax jc .L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx jmp .L048xts_enc_loop6 .align 16 .L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,16(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,32(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm7 movdqa %xmm1,64(%esp) paddq %xmm1,%xmm1 movups (%ebp),%xmm0 pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 pxor %xmm0,%xmm3 movdqu 48(%esi),%xmm5 pxor %xmm0,%xmm4 movdqu 64(%esi),%xmm6 pxor %xmm0,%xmm5 movdqu 80(%esi),%xmm1 pxor %xmm0,%xmm6 leal 96(%esi),%esi pxor (%esp),%xmm2 movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor 16(%esp),%xmm3 pxor 32(%esp),%xmm4 .byte 102,15,56,220,209 pxor 48(%esp),%xmm5 pxor 64(%esp),%xmm6 .byte 102,15,56,220,217 pxor %xmm0,%xmm7 movups 32(%ebp),%xmm0 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movdqa 80(%esp),%xmm1 pxor %xmm0,%xmm0 xorps (%esp),%xmm2 pcmpgtd %xmm1,%xmm0 xorps 16(%esp),%xmm3 movups %xmm2,(%edi) xorps 32(%esp),%xmm4 movups %xmm3,16(%edi) xorps 48(%esp),%xmm5 movups %xmm4,32(%edi) xorps 64(%esp),%xmm6 movups %xmm5,48(%edi) xorps %xmm1,%xmm7 movups %xmm6,64(%edi) pshufd $19,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqa 96(%esp),%xmm3 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax jnc .L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx .L047xts_enc_short: addl $96,%eax jz .L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax jb .L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 je .L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax jb .L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) je .L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm7 pxor %xmm1,%xmm7 movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 pxor (%esp),%xmm2 movdqu 48(%esi),%xmm5 pxor 16(%esp),%xmm3 movdqu 64(%esi),%xmm6 pxor 32(%esp),%xmm4 leal 80(%esi),%esi pxor 48(%esp),%xmm5 movdqa %xmm7,64(%esp) pxor %xmm7,%xmm6 call _aesni_encrypt6 movaps 64(%esp),%xmm1 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps 32(%esp),%xmm4 movups %xmm2,(%edi) xorps 48(%esp),%xmm5 movups %xmm3,16(%edi) xorps %xmm1,%xmm6 movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi jmp .L054xts_enc_done .align 16 .L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 jmp .L054xts_enc_done .align 16 .L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 call _aesni_encrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 jmp .L054xts_enc_done .align 16 .L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 leal 48(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 call _aesni_encrypt3 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 jmp .L054xts_enc_done .align 16 .L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 xorps (%esp),%xmm2 movups 48(%esi),%xmm5 leal 64(%esi),%esi xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 xorps %xmm6,%xmm5 call _aesni_encrypt4 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) xorps %xmm6,%xmm5 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 jmp .L054xts_enc_done .align 16 .L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax jz .L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) jmp .L057xts_enc_steal .align 16 .L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax jz .L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 .L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi movb %cl,-16(%edi) movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax jnz .L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx movups -16(%edi),%xmm2 xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) .L056xts_enc_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movdqa %xmm0,(%esp) pxor %xmm3,%xmm3 movdqa %xmm0,16(%esp) pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin .globl aesni_xts_decrypt .type aesni_xts_decrypt,@function .align 16 aesni_xts_decrypt: .L_aesni_xts_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 36(%esp),%edx movl 40(%esp),%esi movl 240(%edx),%ecx movups (%esi),%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx movl %esp,%ebp subl $120,%esp andl $-16,%esp xorl %ebx,%ebx testl $15,%eax setnz %bl shll $4,%ebx subl %ebx,%eax movl $135,96(%esp) movl $0,100(%esp) movl $1,104(%esp) movl $0,108(%esp) movl %eax,112(%esp) movl %ebp,116(%esp) movl 240(%edx),%ecx movl %edx,%ebp movl %ecx,%ebx movdqa %xmm2,%xmm1 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax jc .L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx jmp .L061xts_dec_loop6 .align 16 .L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,16(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,32(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 pshufd $19,%xmm0,%xmm7 movdqa %xmm1,64(%esp) paddq %xmm1,%xmm1 movups (%ebp),%xmm0 pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 pxor %xmm0,%xmm3 movdqu 48(%esi),%xmm5 pxor %xmm0,%xmm4 movdqu 64(%esi),%xmm6 pxor %xmm0,%xmm5 movdqu 80(%esi),%xmm1 pxor %xmm0,%xmm6 leal 96(%esi),%esi pxor (%esp),%xmm2 movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 pxor 16(%esp),%xmm3 pxor 32(%esp),%xmm4 .byte 102,15,56,222,209 pxor 48(%esp),%xmm5 pxor 64(%esp),%xmm6 .byte 102,15,56,222,217 pxor %xmm0,%xmm7 movups 32(%ebp),%xmm0 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 call .L_aesni_decrypt6_enter movdqa 80(%esp),%xmm1 pxor %xmm0,%xmm0 xorps (%esp),%xmm2 pcmpgtd %xmm1,%xmm0 xorps 16(%esp),%xmm3 movups %xmm2,(%edi) xorps 32(%esp),%xmm4 movups %xmm3,16(%edi) xorps 48(%esp),%xmm5 movups %xmm4,32(%edi) xorps 64(%esp),%xmm6 movups %xmm5,48(%edi) xorps %xmm1,%xmm7 movups %xmm6,64(%edi) pshufd $19,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi movdqa 96(%esp),%xmm3 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax jnc .L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx .L060xts_dec_short: addl $96,%eax jz .L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax jb .L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 je .L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax jb .L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) je .L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) paddq %xmm1,%xmm1 pand %xmm3,%xmm7 pxor %xmm1,%xmm7 movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 pxor (%esp),%xmm2 movdqu 48(%esi),%xmm5 pxor 16(%esp),%xmm3 movdqu 64(%esi),%xmm6 pxor 32(%esp),%xmm4 leal 80(%esi),%esi pxor 48(%esp),%xmm5 movdqa %xmm7,64(%esp) pxor %xmm7,%xmm6 call _aesni_decrypt6 movaps 64(%esp),%xmm1 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps 32(%esp),%xmm4 movups %xmm2,(%edi) xorps 48(%esp),%xmm5 movups %xmm3,16(%edi) xorps %xmm1,%xmm6 movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi jmp .L067xts_dec_done .align 16 .L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 jmp .L067xts_dec_done .align 16 .L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 call _aesni_decrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 jmp .L067xts_dec_done .align 16 .L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 leal 48(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 call _aesni_decrypt3 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 jmp .L067xts_dec_done .align 16 .L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 movups 32(%esi),%xmm4 xorps (%esp),%xmm2 movups 48(%esi),%xmm5 leal 64(%esi),%esi xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 xorps %xmm6,%xmm5 call _aesni_decrypt4 xorps (%esp),%xmm2 xorps 16(%esp),%xmm3 xorps %xmm7,%xmm4 movups %xmm2,(%edi) xorps %xmm6,%xmm5 movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 jmp .L067xts_dec_done .align 16 .L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax jz .L069xts_dec_ret movl %eax,112(%esp) jmp .L070xts_dec_only_one_more .align 16 .L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax jz .L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa 96(%esp),%xmm3 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 .L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 pand %xmm3,%xmm5 pxor %xmm1,%xmm5 movl %ebp,%edx movl %ebx,%ecx movups (%esi),%xmm2 xorps %xmm5,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) .L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi movb %cl,(%edi) movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax jnz .L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx movups (%edi),%xmm2 xorps %xmm6,%xmm2 movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) .L069xts_dec_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 movdqa %xmm0,(%esp) pxor %xmm3,%xmm3 movdqa %xmm0,16(%esp) pxor %xmm4,%xmm4 movdqa %xmm0,32(%esp) pxor %xmm5,%xmm5 movdqa %xmm0,48(%esp) pxor %xmm6,%xmm6 movdqa %xmm0,64(%esp) pxor %xmm7,%xmm7 movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin .globl aesni_cbc_encrypt .type aesni_cbc_encrypt,@function .align 16 aesni_cbc_encrypt: .L_aesni_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl %esp,%ebx movl 24(%esp),%edi subl $24,%ebx movl 28(%esp),%eax andl $-16,%ebx movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax jz .L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 movl 240(%edx),%ecx movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx je .L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax jb .L076cbc_enc_tail subl $16,%eax jmp .L077cbc_enc_loop .align 16 .L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 .L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax jnc .L077cbc_enc_loop addl $16,%eax jnz .L076cbc_enc_tail movaps %xmm2,%xmm7 pxor %xmm2,%xmm2 jmp .L079cbc_ret .L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx subl %eax,%ecx xorl %eax,%eax .long 2868115081 leal -16(%edi),%edi movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx jmp .L077cbc_enc_loop .align 16 .L075cbc_decrypt: cmpl $80,%eax jbe .L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax jmp .L081cbc_dec_loop6_enter .align 16 .L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi .L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 movdqu 48(%esi),%xmm5 movdqu 64(%esi),%xmm6 movdqu 80(%esi),%xmm7 call _aesni_decrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps (%esp),%xmm2 xorps %xmm1,%xmm3 movups 32(%esi),%xmm1 xorps %xmm0,%xmm4 movups 48(%esi),%xmm0 xorps %xmm1,%xmm5 movups 64(%esi),%xmm1 xorps %xmm0,%xmm6 movups 80(%esi),%xmm0 xorps %xmm1,%xmm7 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 96(%esi),%esi movups %xmm4,32(%edi) movl %ebx,%ecx movups %xmm5,48(%edi) movl %ebp,%edx movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax ja .L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax jle .L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi .L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax jbe .L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax jbe .L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax jbe .L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax jbe .L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 xorps %xmm7,%xmm7 call _aesni_decrypt6 movups (%esi),%xmm1 movups 16(%esi),%xmm0 xorps (%esp),%xmm2 xorps %xmm1,%xmm3 movups 32(%esi),%xmm1 xorps %xmm0,%xmm4 movups 48(%esi),%xmm0 xorps %xmm1,%xmm5 movups 64(%esi),%xmm7 xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) pxor %xmm3,%xmm3 movups %xmm4,32(%edi) pxor %xmm4,%xmm4 movups %xmm5,48(%edi) pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 pxor %xmm6,%xmm6 subl $80,%eax jmp .L088cbc_dec_tail_collected .align 16 .L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 .L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx jnz .L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax jmp .L088cbc_dec_tail_collected .align 16 .L085cbc_dec_two: call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax jmp .L088cbc_dec_tail_collected .align 16 .L086cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 pxor %xmm4,%xmm4 movups %xmm3,16(%edi) pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax jmp .L088cbc_dec_tail_collected .align 16 .L087cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 xorps %xmm7,%xmm2 movups 48(%esi),%xmm7 xorps %xmm6,%xmm3 movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 pxor %xmm5,%xmm5 subl $64,%eax jmp .L088cbc_dec_tail_collected .align 16 .L083cbc_dec_clear_tail_collected: pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 .L088cbc_dec_tail_collected: andl $15,%eax jnz .L090cbc_dec_tail_partial movups %xmm2,(%edi) pxor %xmm0,%xmm0 jmp .L079cbc_ret .align 16 .L090cbc_dec_tail_partial: movaps %xmm2,(%esp) pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 movdqa %xmm2,(%esp) .L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp pxor %xmm2,%xmm2 pxor %xmm1,%xmm1 movups %xmm7,(%ebp) pxor %xmm7,%xmm7 .L074cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret .size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin .type _aesni_set_encrypt_key,@function .align 16 _aesni_set_encrypt_key: pushl %ebp pushl %ebx testl %eax,%eax jz .L091bad_pointer testl %edx,%edx jz .L091bad_pointer call .L092pic .L092pic: popl %ebx leal .Lkey_const-.L092pic(%ebx),%ebx leal OPENSSL_ia32cap_P,%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 movl 4(%ebp),%ebp leal 16(%edx),%edx andl $268437504,%ebp cmpl $256,%ecx je .L09314rounds cmpl $192,%ecx je .L09412rounds cmpl $128,%ecx jne .L095bad_keybits .align 16 .L09610rounds: cmpl $268435456,%ebp je .L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 call .L098key_128_cold .byte 102,15,58,223,200,2 call .L099key_128 .byte 102,15,58,223,200,4 call .L099key_128 .byte 102,15,58,223,200,8 call .L099key_128 .byte 102,15,58,223,200,16 call .L099key_128 .byte 102,15,58,223,200,32 call .L099key_128 .byte 102,15,58,223,200,64 call .L099key_128 .byte 102,15,58,223,200,128 call .L099key_128 .byte 102,15,58,223,200,27 call .L099key_128 .byte 102,15,58,223,200,54 call .L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) jmp .L100good_key .align 16 .L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx .L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 ret .align 16 .L09710rounds_alt: movdqa (%ebx),%xmm5 movl $8,%ecx movdqa 32(%ebx),%xmm4 movdqa %xmm0,%xmm2 movdqu %xmm0,-16(%edx) .L101loop_key128: .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 leal 16(%edx),%edx movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,-16(%edx) movdqa %xmm0,%xmm2 decl %ecx jnz .L101loop_key128 movdqa 48(%ebx),%xmm4 .byte 102,15,56,0,197 .byte 102,15,56,221,196 pslld $1,%xmm4 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,(%edx) movdqa %xmm0,%xmm2 .byte 102,15,56,0,197 .byte 102,15,56,221,196 movdqa %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm2,%xmm3 pslldq $4,%xmm2 pxor %xmm3,%xmm2 pxor %xmm2,%xmm0 movdqu %xmm0,16(%edx) movl $9,%ecx movl %ecx,96(%edx) jmp .L100good_key .align 16 .L09412rounds: movq 16(%eax),%xmm2 cmpl $268435456,%ebp je .L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 call .L103key_192a_cold .byte 102,15,58,223,202,2 call .L104key_192b .byte 102,15,58,223,202,4 call .L105key_192a .byte 102,15,58,223,202,8 call .L104key_192b .byte 102,15,58,223,202,16 call .L105key_192a .byte 102,15,58,223,202,32 call .L104key_192b .byte 102,15,58,223,202,64 call .L105key_192a .byte 102,15,58,223,202,128 call .L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) jmp .L100good_key .align 16 .L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 .L103key_192a_cold: movaps %xmm2,%xmm5 .L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 pslldq $4,%xmm3 xorps %xmm4,%xmm0 pshufd $85,%xmm1,%xmm1 pxor %xmm3,%xmm2 pxor %xmm1,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm3,%xmm2 ret .align 16 .L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx jmp .L106key_192b_warm .align 16 .L10212rounds_alt: movdqa 16(%ebx),%xmm5 movdqa 32(%ebx),%xmm4 movl $8,%ecx movdqu %xmm0,-16(%edx) .L107loop_key192: movq %xmm2,(%edx) movdqa %xmm2,%xmm1 .byte 102,15,56,0,213 .byte 102,15,56,221,212 pslld $1,%xmm4 leal 24(%edx),%edx movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pxor %xmm2,%xmm0 pxor %xmm3,%xmm2 movdqu %xmm0,-16(%edx) decl %ecx jnz .L107loop_key192 movl $11,%ecx movl %ecx,32(%edx) jmp .L100good_key .align 16 .L09314rounds: movups 16(%eax),%xmm2 leal 16(%edx),%edx cmpl $268435456,%ebp je .L10814rounds_alt movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 call .L109key_256a_cold .byte 102,15,58,223,200,1 call .L110key_256b .byte 102,15,58,223,202,2 call .L111key_256a .byte 102,15,58,223,200,2 call .L110key_256b .byte 102,15,58,223,202,4 call .L111key_256a .byte 102,15,58,223,200,4 call .L110key_256b .byte 102,15,58,223,202,8 call .L111key_256a .byte 102,15,58,223,200,8 call .L110key_256b .byte 102,15,58,223,202,16 call .L111key_256a .byte 102,15,58,223,200,16 call .L110key_256b .byte 102,15,58,223,202,32 call .L111key_256a .byte 102,15,58,223,200,32 call .L110key_256b .byte 102,15,58,223,202,64 call .L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax jmp .L100good_key .align 16 .L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx .L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 ret .align 16 .L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $140,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret .align 16 .L10814rounds_alt: movdqa (%ebx),%xmm5 movdqa 32(%ebx),%xmm4 movl $7,%ecx movdqu %xmm0,-32(%edx) movdqa %xmm2,%xmm1 movdqu %xmm2,-16(%edx) .L112loop_key256: .byte 102,15,56,0,213 .byte 102,15,56,221,212 movdqa %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm0,%xmm3 pslldq $4,%xmm0 pxor %xmm3,%xmm0 pslld $1,%xmm4 pxor %xmm2,%xmm0 movdqu %xmm0,(%edx) decl %ecx jz .L113done_key256 pshufd $255,%xmm0,%xmm2 pxor %xmm3,%xmm3 .byte 102,15,56,221,211 movdqa %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm1,%xmm3 pslldq $4,%xmm1 pxor %xmm3,%xmm1 pxor %xmm1,%xmm2 movdqu %xmm2,16(%edx) leal 32(%edx),%edx movdqa %xmm2,%xmm1 jmp .L112loop_key256 .L113done_key256: movl $13,%ecx movl %ecx,16(%edx) .L100good_key: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 xorl %eax,%eax popl %ebx popl %ebp ret .align 4 .L091bad_pointer: movl $-1,%eax popl %ebx popl %ebp ret .align 4 .L095bad_keybits: pxor %xmm0,%xmm0 movl $-2,%eax popl %ebx popl %ebp ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key .globl aesni_set_encrypt_key .type aesni_set_encrypt_key,@function .align 16 aesni_set_encrypt_key: .L_aesni_set_encrypt_key_begin: movl 4(%esp),%eax movl 8(%esp),%ecx movl 12(%esp),%edx call _aesni_set_encrypt_key ret .size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin .globl aesni_set_decrypt_key .type aesni_set_decrypt_key,@function .align 16 aesni_set_decrypt_key: .L_aesni_set_decrypt_key_begin: movl 4(%esp),%eax movl 8(%esp),%ecx movl 12(%esp),%edx call _aesni_set_encrypt_key movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax jnz .L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 movups %xmm0,(%eax) movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax .L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 .byte 102,15,56,219,201 leal 16(%edx),%edx leal -16(%eax),%eax movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax ja .L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 xorl %eax,%eax .L114dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin .align 64 .Lkey_const: .long 202313229,202313229,202313229,202313229 .long 67569157,67569157,67569157,67569157 .long 1,1,1,1 .long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/bf-586.S =================================================================== --- head/secure/lib/libcrypto/i386/bf-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/bf-586.S (revision 299481) @@ -1,1796 +1,1797 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from bf-586.pl. #ifdef PIC .file "bf-586.S" .text .globl BF_encrypt .type BF_encrypt,@function .align 16 BF_encrypt: .L_BF_encrypt_begin: pushl %ebp pushl %ebx movl 12(%esp),%ebx movl 16(%esp),%ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi xorl %eax,%eax movl (%ebp),%ebx xorl %ecx,%ecx xorl %ebx,%edi movl 4(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 8(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 12(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 16(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 20(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 24(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 28(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 32(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 36(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 40(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 44(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 48(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 52(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 56(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 60(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 64(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx movl 20(%esp),%eax xorl %ebx,%edi movl 68(%ebp),%edx xorl %edx,%esi movl %edi,4(%eax) movl %esi,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_encrypt,.-.L_BF_encrypt_begin .globl BF_decrypt .type BF_decrypt,@function .align 16 BF_decrypt: .L_BF_decrypt_begin: pushl %ebp pushl %ebx movl 12(%esp),%ebx movl 16(%esp),%ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi xorl %eax,%eax movl 68(%ebp),%ebx xorl %ecx,%ecx xorl %ebx,%edi movl 64(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 60(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 56(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 52(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 48(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 44(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 40(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 36(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 32(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 28(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 24(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 20(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 16(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 12(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 8(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 4(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx movl 20(%esp),%eax xorl %ebx,%edi movl (%ebp),%edx xorl %edx,%esi movl %edi,4(%eax) movl %esi,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_decrypt,.-.L_BF_decrypt_begin .globl BF_cbc_encrypt .type BF_cbc_encrypt,@function .align 16 BF_cbc_encrypt: .L_BF_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L000decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L001encrypt_finish .L002encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L002encrypt_loop .L001encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish call .L004PIC_point .L004PIC_point: popl %edx leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L006ej7: movb 6(%esi),%dh shll $8,%edx .L007ej6: movb 5(%esi),%dh .L008ej5: movb 4(%esi),%dl .L009ej4: movl (%esi),%ecx jmp .L010ejend .L011ej3: movb 2(%esi),%ch shll $8,%ecx .L012ej2: movb 1(%esi),%ch .L013ej1: movb (%esi),%cl .L010ejend: xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L003finish .L000decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L014decrypt_finish .L015decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L015decrypt_loop .L014decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L016dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L017dj6: movb %dh,5(%edi) .L018dj5: movb %dl,4(%edi) .L019dj4: movl %ecx,(%edi) jmp .L020djend .L021dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L022dj2: movb %ch,1(%esi) .L023dj1: movb %cl,(%esi) .L020djend: jmp .L003finish .L003finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L005cbc_enc_jmp_table: .long 0 .long .L013ej1-.L004PIC_point .long .L012ej2-.L004PIC_point .long .L011ej3-.L004PIC_point .long .L009ej4-.L004PIC_point .long .L008ej5-.L004PIC_point .long .L007ej6-.L004PIC_point .long .L006ej7-.L004PIC_point .align 64 .size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin #else .file "bf-586.S" .text .globl BF_encrypt .type BF_encrypt,@function .align 16 BF_encrypt: .L_BF_encrypt_begin: pushl %ebp pushl %ebx movl 12(%esp),%ebx movl 16(%esp),%ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi xorl %eax,%eax movl (%ebp),%ebx xorl %ecx,%ecx xorl %ebx,%edi movl 4(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 8(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 12(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 16(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 20(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 24(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 28(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 32(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 36(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 40(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 44(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 48(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 52(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 56(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 60(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 64(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx movl 20(%esp),%eax xorl %ebx,%edi movl 68(%ebp),%edx xorl %edx,%esi movl %edi,4(%eax) movl %esi,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_encrypt,.-.L_BF_encrypt_begin .globl BF_decrypt .type BF_decrypt,@function .align 16 BF_decrypt: .L_BF_decrypt_begin: pushl %ebp pushl %ebx movl 12(%esp),%ebx movl 16(%esp),%ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi xorl %eax,%eax movl 68(%ebp),%ebx xorl %ecx,%ecx xorl %ebx,%edi movl 64(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 60(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 56(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 52(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 48(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 44(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 40(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 36(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 32(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 28(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 24(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 20(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 16(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 12(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%edi movl 8(%ebp),%edx movl %edi,%ebx xorl %edx,%esi shrl $16,%ebx movl %edi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx xorl %eax,%eax xorl %ebx,%esi movl 4(%ebp),%edx movl %esi,%ebx xorl %edx,%edi shrl $16,%ebx movl %esi,%edx movb %bh,%al andl $255,%ebx movb %dh,%cl andl $255,%edx movl 72(%ebp,%eax,4),%eax movl 1096(%ebp,%ebx,4),%ebx addl %eax,%ebx movl 2120(%ebp,%ecx,4),%eax xorl %eax,%ebx movl 3144(%ebp,%edx,4),%edx addl %edx,%ebx movl 20(%esp),%eax xorl %ebx,%edi movl (%ebp),%edx xorl %edx,%esi movl %edi,4(%eax) movl %esi,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_decrypt,.-.L_BF_decrypt_begin .globl BF_cbc_encrypt .type BF_cbc_encrypt,@function .align 16 BF_cbc_encrypt: .L_BF_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L000decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L001encrypt_finish .L002encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L002encrypt_loop .L001encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish call .L004PIC_point .L004PIC_point: popl %edx leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L006ej7: movb 6(%esi),%dh shll $8,%edx .L007ej6: movb 5(%esi),%dh .L008ej5: movb 4(%esi),%dl .L009ej4: movl (%esi),%ecx jmp .L010ejend .L011ej3: movb 2(%esi),%ch shll $8,%ecx .L012ej2: movb 1(%esi),%ch .L013ej1: movb (%esi),%cl .L010ejend: xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L003finish .L000decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L014decrypt_finish .L015decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L015decrypt_loop .L014decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L016dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L017dj6: movb %dh,5(%edi) .L018dj5: movb %dl,4(%edi) .L019dj4: movl %ecx,(%edi) jmp .L020djend .L021dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L022dj2: movb %ch,1(%esi) .L023dj1: movb %cl,(%esi) .L020djend: jmp .L003finish .L003finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L005cbc_enc_jmp_table: .long 0 .long .L013ej1-.L004PIC_point .long .L012ej2-.L004PIC_point .long .L011ej3-.L004PIC_point .long .L009ej4-.L004PIC_point .long .L008ej5-.L004PIC_point .long .L007ej6-.L004PIC_point .long .L006ej7-.L004PIC_point .align 64 .size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin #endif Index: head/secure/lib/libcrypto/i386/bf-686.S =================================================================== --- head/secure/lib/libcrypto/i386/bf-686.S (revision 299480) +++ head/secure/lib/libcrypto/i386/bf-686.S (revision 299481) @@ -1,1732 +1,1733 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from bf-686.pl. #ifdef PIC .file "bf-686.S" .text .globl BF_encrypt .type BF_encrypt,@function .align 16 BF_encrypt: .L_BF_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl (%edi),%ecx rorl $16,%ecx movl 4(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 8(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 12(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 16(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 20(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 24(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 28(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 32(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 36(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 40(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 44(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 48(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 52(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 56(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 60(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 64(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx xorl 68(%edi),%edx movl 20(%esp),%eax movl %edx,(%eax) movl %ecx,4(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_encrypt,.-.L_BF_encrypt_begin .globl BF_decrypt .type BF_decrypt,@function .align 16 BF_decrypt: .L_BF_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl 68(%edi),%ecx rorl $16,%ecx movl 64(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 60(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 56(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 52(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 48(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 44(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 40(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 36(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 32(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 28(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 24(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 20(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 16(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 12(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 8(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 4(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx xorl (%edi),%edx movl 20(%esp),%eax movl %edx,(%eax) movl %ecx,4(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_decrypt,.-.L_BF_decrypt_begin .globl BF_cbc_encrypt .type BF_cbc_encrypt,@function .align 16 BF_cbc_encrypt: .L_BF_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L000decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L001encrypt_finish .L002encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L002encrypt_loop .L001encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish call .L004PIC_point .L004PIC_point: popl %edx leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L006ej7: movb 6(%esi),%dh shll $8,%edx .L007ej6: movb 5(%esi),%dh .L008ej5: movb 4(%esi),%dl .L009ej4: movl (%esi),%ecx jmp .L010ejend .L011ej3: movb 2(%esi),%ch shll $8,%ecx .L012ej2: movb 1(%esi),%ch .L013ej1: movb (%esi),%cl .L010ejend: xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L003finish .L000decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L014decrypt_finish .L015decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L015decrypt_loop .L014decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L016dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L017dj6: movb %dh,5(%edi) .L018dj5: movb %dl,4(%edi) .L019dj4: movl %ecx,(%edi) jmp .L020djend .L021dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L022dj2: movb %ch,1(%esi) .L023dj1: movb %cl,(%esi) .L020djend: jmp .L003finish .L003finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L005cbc_enc_jmp_table: .long 0 .long .L013ej1-.L004PIC_point .long .L012ej2-.L004PIC_point .long .L011ej3-.L004PIC_point .long .L009ej4-.L004PIC_point .long .L008ej5-.L004PIC_point .long .L007ej6-.L004PIC_point .long .L006ej7-.L004PIC_point .align 64 .size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin #else .file "bf-686.S" .text .globl BF_encrypt .type BF_encrypt,@function .align 16 BF_encrypt: .L_BF_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl (%edi),%ecx rorl $16,%ecx movl 4(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 8(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 12(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 16(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 20(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 24(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 28(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 32(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 36(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 40(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 44(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 48(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 52(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 56(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 60(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 64(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx xorl 68(%edi),%edx movl 20(%esp),%eax movl %edx,(%eax) movl %ecx,4(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_encrypt,.-.L_BF_encrypt_begin .globl BF_decrypt .type BF_decrypt,@function .align 16 BF_decrypt: .L_BF_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl 68(%edi),%ecx rorl $16,%ecx movl 64(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 60(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 56(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 52(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 48(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 44(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 40(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 36(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 32(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 28(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 24(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 20(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 16(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 12(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx rorl $16,%ecx movl 8(%edi),%esi movb %ch,%al movb %cl,%bl rorl $16,%ecx xorl %esi,%edx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %ch,%al movb %cl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%edx rorl $16,%edx movl 4(%edi),%esi movb %dh,%al movb %dl,%bl rorl $16,%edx xorl %esi,%ecx movl 72(%edi,%eax,4),%esi movl 1096(%edi,%ebx,4),%ebp movb %dh,%al movb %dl,%bl addl %ebp,%esi movl 2120(%edi,%eax,4),%eax xorl %eax,%esi movl 3144(%edi,%ebx,4),%ebp addl %ebp,%esi xorl %eax,%eax xorl %esi,%ecx xorl (%edi),%edx movl 20(%esp),%eax movl %edx,(%eax) movl %ecx,4(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size BF_decrypt,.-.L_BF_decrypt_begin .globl BF_cbc_encrypt .type BF_cbc_encrypt,@function .align 16 BF_cbc_encrypt: .L_BF_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L000decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L001encrypt_finish .L002encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L002encrypt_loop .L001encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish call .L004PIC_point .L004PIC_point: popl %edx leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L006ej7: movb 6(%esi),%dh shll $8,%edx .L007ej6: movb 5(%esi),%dh .L008ej5: movb 4(%esi),%dl .L009ej4: movl (%esi),%ecx jmp .L010ejend .L011ej3: movb 2(%esi),%ch shll $8,%ecx .L012ej2: movb 1(%esi),%ch .L013ej1: movb (%esi),%cl .L010ejend: xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L003finish .L000decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L014decrypt_finish .L015decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L015decrypt_loop .L014decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L003finish movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L016dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L017dj6: movb %dh,5(%edi) .L018dj5: movb %dl,4(%edi) .L019dj4: movl %ecx,(%edi) jmp .L020djend .L021dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L022dj2: movb %ch,1(%esi) .L023dj1: movb %cl,(%esi) .L020djend: jmp .L003finish .L003finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L005cbc_enc_jmp_table: .long 0 .long .L013ej1-.L004PIC_point .long .L012ej2-.L004PIC_point .long .L011ej3-.L004PIC_point .long .L009ej4-.L004PIC_point .long .L008ej5-.L004PIC_point .long .L007ej6-.L004PIC_point .long .L006ej7-.L004PIC_point .align 64 .size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin #endif Index: head/secure/lib/libcrypto/i386/bn-586.S =================================================================== --- head/secure/lib/libcrypto/i386/bn-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/bn-586.S (revision 299481) @@ -1,3055 +1,3056 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from bn-586.pl. #ifdef PIC .file "bn-586.S" .text .globl bn_mul_add_words .type bn_mul_add_words,@function .align 16 bn_mul_add_words: .L_bn_mul_add_words_begin: call .L000PIC_me_up .L000PIC_me_up: popl %eax leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax btl $26,(%eax) jnc .L001maw_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx movd 16(%esp),%mm0 pxor %mm1,%mm1 jmp .L002maw_sse2_entry .align 16 .L003maw_sse2_unrolled: movd (%eax),%mm3 paddq %mm3,%mm1 movd (%edx),%mm2 pmuludq %mm0,%mm2 movd 4(%edx),%mm4 pmuludq %mm0,%mm4 movd 8(%edx),%mm6 pmuludq %mm0,%mm6 movd 12(%edx),%mm7 pmuludq %mm0,%mm7 paddq %mm2,%mm1 movd 4(%eax),%mm3 paddq %mm4,%mm3 movd 8(%eax),%mm5 paddq %mm6,%mm5 movd 12(%eax),%mm4 paddq %mm4,%mm7 movd %mm1,(%eax) movd 16(%edx),%mm2 pmuludq %mm0,%mm2 psrlq $32,%mm1 movd 20(%edx),%mm4 pmuludq %mm0,%mm4 paddq %mm3,%mm1 movd 24(%edx),%mm6 pmuludq %mm0,%mm6 movd %mm1,4(%eax) psrlq $32,%mm1 movd 28(%edx),%mm3 addl $32,%edx pmuludq %mm0,%mm3 paddq %mm5,%mm1 movd 16(%eax),%mm5 paddq %mm5,%mm2 movd %mm1,8(%eax) psrlq $32,%mm1 paddq %mm7,%mm1 movd 20(%eax),%mm5 paddq %mm5,%mm4 movd %mm1,12(%eax) psrlq $32,%mm1 paddq %mm2,%mm1 movd 24(%eax),%mm5 paddq %mm5,%mm6 movd %mm1,16(%eax) psrlq $32,%mm1 paddq %mm4,%mm1 movd 28(%eax),%mm5 paddq %mm5,%mm3 movd %mm1,20(%eax) psrlq $32,%mm1 paddq %mm6,%mm1 movd %mm1,24(%eax) psrlq $32,%mm1 paddq %mm3,%mm1 movd %mm1,28(%eax) leal 32(%eax),%eax psrlq $32,%mm1 subl $8,%ecx jz .L004maw_sse2_exit .L002maw_sse2_entry: testl $4294967288,%ecx jnz .L003maw_sse2_unrolled .align 4 .L005maw_sse2_loop: movd (%edx),%mm2 movd (%eax),%mm3 pmuludq %mm0,%mm2 leal 4(%edx),%edx paddq %mm3,%mm1 paddq %mm2,%mm1 movd %mm1,(%eax) subl $1,%ecx psrlq $32,%mm1 leal 4(%eax),%eax jnz .L005maw_sse2_loop .L004maw_sse2_exit: movd %mm1,%eax emms ret .align 16 .L001maw_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %esi,%esi movl 20(%esp),%edi movl 28(%esp),%ecx movl 24(%esp),%ebx andl $4294967288,%ecx movl 32(%esp),%ebp pushl %ecx jz .L006maw_finish .align 16 .L007maw_loop: movl (%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl (%edi),%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi movl 4(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 4(%edi),%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi movl 8(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 8(%edi),%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi movl 12(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 12(%edi),%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi movl 16(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 16(%edi),%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi movl 20(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 20(%edi),%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi movl 24(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 24(%edi),%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi movl 28(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 28(%edi),%eax adcl $0,%edx movl %eax,28(%edi) movl %edx,%esi subl $8,%ecx leal 32(%ebx),%ebx leal 32(%edi),%edi jnz .L007maw_loop .L006maw_finish: movl 32(%esp),%ecx andl $7,%ecx jnz .L008maw_finish2 jmp .L009maw_end .L008maw_finish2: movl (%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl (%edi),%eax adcl $0,%edx decl %ecx movl %eax,(%edi) movl %edx,%esi jz .L009maw_end movl 4(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 4(%edi),%eax adcl $0,%edx decl %ecx movl %eax,4(%edi) movl %edx,%esi jz .L009maw_end movl 8(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 8(%edi),%eax adcl $0,%edx decl %ecx movl %eax,8(%edi) movl %edx,%esi jz .L009maw_end movl 12(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 12(%edi),%eax adcl $0,%edx decl %ecx movl %eax,12(%edi) movl %edx,%esi jz .L009maw_end movl 16(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 16(%edi),%eax adcl $0,%edx decl %ecx movl %eax,16(%edi) movl %edx,%esi jz .L009maw_end movl 20(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 20(%edi),%eax adcl $0,%edx decl %ecx movl %eax,20(%edi) movl %edx,%esi jz .L009maw_end movl 24(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 24(%edi),%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi .L009maw_end: movl %esi,%eax popl %ecx popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_add_words,.-.L_bn_mul_add_words_begin .globl bn_mul_words .type bn_mul_words,@function .align 16 bn_mul_words: .L_bn_mul_words_begin: call .L010PIC_me_up .L010PIC_me_up: popl %eax leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax btl $26,(%eax) jnc .L011mw_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx movd 16(%esp),%mm0 pxor %mm1,%mm1 .align 16 .L012mw_sse2_loop: movd (%edx),%mm2 pmuludq %mm0,%mm2 leal 4(%edx),%edx paddq %mm2,%mm1 movd %mm1,(%eax) subl $1,%ecx psrlq $32,%mm1 leal 4(%eax),%eax jnz .L012mw_sse2_loop movd %mm1,%eax emms ret .align 16 .L011mw_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %esi,%esi movl 20(%esp),%edi movl 24(%esp),%ebx movl 28(%esp),%ebp movl 32(%esp),%ecx andl $4294967288,%ebp jz .L013mw_finish .L014mw_loop: movl (%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi movl 4(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi movl 8(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi movl 12(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi movl 16(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi movl 20(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi movl 24(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi movl 28(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,28(%edi) movl %edx,%esi addl $32,%ebx addl $32,%edi subl $8,%ebp jz .L013mw_finish jmp .L014mw_loop .L013mw_finish: movl 28(%esp),%ebp andl $7,%ebp jnz .L015mw_finish2 jmp .L016mw_end .L015mw_finish2: movl (%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 4(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 8(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 12(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 16(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 20(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi decl %ebp jz .L016mw_end movl 24(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi .L016mw_end: movl %esi,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_words,.-.L_bn_mul_words_begin .globl bn_sqr_words .type bn_sqr_words,@function .align 16 bn_sqr_words: .L_bn_sqr_words_begin: call .L017PIC_me_up .L017PIC_me_up: popl %eax leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax btl $26,(%eax) jnc .L018sqr_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx .align 16 .L019sqr_sse2_loop: movd (%edx),%mm0 pmuludq %mm0,%mm0 leal 4(%edx),%edx movq %mm0,(%eax) subl $1,%ecx leal 8(%eax),%eax jnz .L019sqr_sse2_loop emms ret .align 16 .L018sqr_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%ebx andl $4294967288,%ebx jz .L020sw_finish .L021sw_loop: movl (%edi),%eax mull %eax movl %eax,(%esi) movl %edx,4(%esi) movl 4(%edi),%eax mull %eax movl %eax,8(%esi) movl %edx,12(%esi) movl 8(%edi),%eax mull %eax movl %eax,16(%esi) movl %edx,20(%esi) movl 12(%edi),%eax mull %eax movl %eax,24(%esi) movl %edx,28(%esi) movl 16(%edi),%eax mull %eax movl %eax,32(%esi) movl %edx,36(%esi) movl 20(%edi),%eax mull %eax movl %eax,40(%esi) movl %edx,44(%esi) movl 24(%edi),%eax mull %eax movl %eax,48(%esi) movl %edx,52(%esi) movl 28(%edi),%eax mull %eax movl %eax,56(%esi) movl %edx,60(%esi) addl $32,%edi addl $64,%esi subl $8,%ebx jnz .L021sw_loop .L020sw_finish: movl 28(%esp),%ebx andl $7,%ebx jz .L022sw_end movl (%edi),%eax mull %eax movl %eax,(%esi) decl %ebx movl %edx,4(%esi) jz .L022sw_end movl 4(%edi),%eax mull %eax movl %eax,8(%esi) decl %ebx movl %edx,12(%esi) jz .L022sw_end movl 8(%edi),%eax mull %eax movl %eax,16(%esi) decl %ebx movl %edx,20(%esi) jz .L022sw_end movl 12(%edi),%eax mull %eax movl %eax,24(%esi) decl %ebx movl %edx,28(%esi) jz .L022sw_end movl 16(%edi),%eax mull %eax movl %eax,32(%esi) decl %ebx movl %edx,36(%esi) jz .L022sw_end movl 20(%edi),%eax mull %eax movl %eax,40(%esi) decl %ebx movl %edx,44(%esi) jz .L022sw_end movl 24(%edi),%eax mull %eax movl %eax,48(%esi) movl %edx,52(%esi) .L022sw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sqr_words,.-.L_bn_sqr_words_begin .globl bn_div_words .type bn_div_words,@function .align 16 bn_div_words: .L_bn_div_words_begin: movl 4(%esp),%edx movl 8(%esp),%eax movl 12(%esp),%ecx divl %ecx ret .size bn_div_words,.-.L_bn_div_words_begin .globl bn_add_words .type bn_add_words,@function .align 16 bn_add_words: .L_bn_add_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L023aw_finish .L024aw_loop: movl (%esi),%ecx movl (%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L024aw_loop .L023aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L025aw_end movl (%esi),%ecx movl (%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L025aw_end movl 4(%esi),%ecx movl 4(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L025aw_end movl 8(%esi),%ecx movl 8(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L025aw_end movl 12(%esi),%ecx movl 12(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L025aw_end movl 16(%esi),%ecx movl 16(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L025aw_end movl 20(%esi),%ecx movl 20(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L025aw_end movl 24(%esi),%ecx movl 24(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) .L025aw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_add_words,.-.L_bn_add_words_begin .globl bn_sub_words .type bn_sub_words,@function .align 16 bn_sub_words: .L_bn_sub_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L026aw_finish .L027aw_loop: movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L027aw_loop .L026aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L028aw_end movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L028aw_end movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L028aw_end movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L028aw_end movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L028aw_end movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L028aw_end movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) .L028aw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sub_words,.-.L_bn_sub_words_begin .globl bn_sub_part_words .type bn_sub_part_words,@function .align 16 bn_sub_part_words: .L_bn_sub_part_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L029aw_finish .L030aw_loop: movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L030aw_loop .L029aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L031aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx .L031aw_end: cmpl $0,36(%esp) je .L032pw_end movl 36(%esp),%ebp cmpl $0,%ebp je .L032pw_end jge .L033pw_pos movl $0,%edx subl %ebp,%edx movl %edx,%ebp andl $4294967288,%ebp jz .L034pw_neg_finish .L035pw_neg_loop: movl $0,%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl $0,%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl $0,%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl $0,%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl $0,%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl $0,%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl $0,%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl $0,%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L035pw_neg_loop .L034pw_neg_finish: movl 36(%esp),%edx movl $0,%ebp subl %edx,%ebp andl $7,%ebp jz .L032pw_end movl $0,%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L032pw_end movl $0,%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L032pw_end movl $0,%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L032pw_end movl $0,%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L032pw_end movl $0,%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L032pw_end movl $0,%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L032pw_end movl $0,%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) jmp .L032pw_end .L033pw_pos: andl $4294967288,%ebp jz .L036pw_pos_finish .L037pw_pos_loop: movl (%esi),%ecx subl %eax,%ecx movl %ecx,(%ebx) jnc .L038pw_nc0 movl 4(%esi),%ecx subl %eax,%ecx movl %ecx,4(%ebx) jnc .L039pw_nc1 movl 8(%esi),%ecx subl %eax,%ecx movl %ecx,8(%ebx) jnc .L040pw_nc2 movl 12(%esi),%ecx subl %eax,%ecx movl %ecx,12(%ebx) jnc .L041pw_nc3 movl 16(%esi),%ecx subl %eax,%ecx movl %ecx,16(%ebx) jnc .L042pw_nc4 movl 20(%esi),%ecx subl %eax,%ecx movl %ecx,20(%ebx) jnc .L043pw_nc5 movl 24(%esi),%ecx subl %eax,%ecx movl %ecx,24(%ebx) jnc .L044pw_nc6 movl 28(%esi),%ecx subl %eax,%ecx movl %ecx,28(%ebx) jnc .L045pw_nc7 addl $32,%esi addl $32,%ebx subl $8,%ebp jnz .L037pw_pos_loop .L036pw_pos_finish: movl 36(%esp),%ebp andl $7,%ebp jz .L032pw_end movl (%esi),%ecx subl %eax,%ecx movl %ecx,(%ebx) jnc .L046pw_tail_nc0 decl %ebp jz .L032pw_end movl 4(%esi),%ecx subl %eax,%ecx movl %ecx,4(%ebx) jnc .L047pw_tail_nc1 decl %ebp jz .L032pw_end movl 8(%esi),%ecx subl %eax,%ecx movl %ecx,8(%ebx) jnc .L048pw_tail_nc2 decl %ebp jz .L032pw_end movl 12(%esi),%ecx subl %eax,%ecx movl %ecx,12(%ebx) jnc .L049pw_tail_nc3 decl %ebp jz .L032pw_end movl 16(%esi),%ecx subl %eax,%ecx movl %ecx,16(%ebx) jnc .L050pw_tail_nc4 decl %ebp jz .L032pw_end movl 20(%esi),%ecx subl %eax,%ecx movl %ecx,20(%ebx) jnc .L051pw_tail_nc5 decl %ebp jz .L032pw_end movl 24(%esi),%ecx subl %eax,%ecx movl %ecx,24(%ebx) jnc .L052pw_tail_nc6 movl $1,%eax jmp .L032pw_end .L053pw_nc_loop: movl (%esi),%ecx movl %ecx,(%ebx) .L038pw_nc0: movl 4(%esi),%ecx movl %ecx,4(%ebx) .L039pw_nc1: movl 8(%esi),%ecx movl %ecx,8(%ebx) .L040pw_nc2: movl 12(%esi),%ecx movl %ecx,12(%ebx) .L041pw_nc3: movl 16(%esi),%ecx movl %ecx,16(%ebx) .L042pw_nc4: movl 20(%esi),%ecx movl %ecx,20(%ebx) .L043pw_nc5: movl 24(%esi),%ecx movl %ecx,24(%ebx) .L044pw_nc6: movl 28(%esi),%ecx movl %ecx,28(%ebx) .L045pw_nc7: addl $32,%esi addl $32,%ebx subl $8,%ebp jnz .L053pw_nc_loop movl 36(%esp),%ebp andl $7,%ebp jz .L054pw_nc_end movl (%esi),%ecx movl %ecx,(%ebx) .L046pw_tail_nc0: decl %ebp jz .L054pw_nc_end movl 4(%esi),%ecx movl %ecx,4(%ebx) .L047pw_tail_nc1: decl %ebp jz .L054pw_nc_end movl 8(%esi),%ecx movl %ecx,8(%ebx) .L048pw_tail_nc2: decl %ebp jz .L054pw_nc_end movl 12(%esi),%ecx movl %ecx,12(%ebx) .L049pw_tail_nc3: decl %ebp jz .L054pw_nc_end movl 16(%esi),%ecx movl %ecx,16(%ebx) .L050pw_tail_nc4: decl %ebp jz .L054pw_nc_end movl 20(%esi),%ecx movl %ecx,20(%ebx) .L051pw_tail_nc5: decl %ebp jz .L054pw_nc_end movl 24(%esi),%ecx movl %ecx,24(%ebx) .L052pw_tail_nc6: .L054pw_nc_end: movl $0,%eax .L032pw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sub_part_words,.-.L_bn_sub_part_words_begin .comm OPENSSL_ia32cap_P,16,4 #else .file "bn-586.S" .text .globl bn_mul_add_words .type bn_mul_add_words,@function .align 16 bn_mul_add_words: .L_bn_mul_add_words_begin: leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) jnc .L000maw_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx movd 16(%esp),%mm0 pxor %mm1,%mm1 jmp .L001maw_sse2_entry .align 16 .L002maw_sse2_unrolled: movd (%eax),%mm3 paddq %mm3,%mm1 movd (%edx),%mm2 pmuludq %mm0,%mm2 movd 4(%edx),%mm4 pmuludq %mm0,%mm4 movd 8(%edx),%mm6 pmuludq %mm0,%mm6 movd 12(%edx),%mm7 pmuludq %mm0,%mm7 paddq %mm2,%mm1 movd 4(%eax),%mm3 paddq %mm4,%mm3 movd 8(%eax),%mm5 paddq %mm6,%mm5 movd 12(%eax),%mm4 paddq %mm4,%mm7 movd %mm1,(%eax) movd 16(%edx),%mm2 pmuludq %mm0,%mm2 psrlq $32,%mm1 movd 20(%edx),%mm4 pmuludq %mm0,%mm4 paddq %mm3,%mm1 movd 24(%edx),%mm6 pmuludq %mm0,%mm6 movd %mm1,4(%eax) psrlq $32,%mm1 movd 28(%edx),%mm3 addl $32,%edx pmuludq %mm0,%mm3 paddq %mm5,%mm1 movd 16(%eax),%mm5 paddq %mm5,%mm2 movd %mm1,8(%eax) psrlq $32,%mm1 paddq %mm7,%mm1 movd 20(%eax),%mm5 paddq %mm5,%mm4 movd %mm1,12(%eax) psrlq $32,%mm1 paddq %mm2,%mm1 movd 24(%eax),%mm5 paddq %mm5,%mm6 movd %mm1,16(%eax) psrlq $32,%mm1 paddq %mm4,%mm1 movd 28(%eax),%mm5 paddq %mm5,%mm3 movd %mm1,20(%eax) psrlq $32,%mm1 paddq %mm6,%mm1 movd %mm1,24(%eax) psrlq $32,%mm1 paddq %mm3,%mm1 movd %mm1,28(%eax) leal 32(%eax),%eax psrlq $32,%mm1 subl $8,%ecx jz .L003maw_sse2_exit .L001maw_sse2_entry: testl $4294967288,%ecx jnz .L002maw_sse2_unrolled .align 4 .L004maw_sse2_loop: movd (%edx),%mm2 movd (%eax),%mm3 pmuludq %mm0,%mm2 leal 4(%edx),%edx paddq %mm3,%mm1 paddq %mm2,%mm1 movd %mm1,(%eax) subl $1,%ecx psrlq $32,%mm1 leal 4(%eax),%eax jnz .L004maw_sse2_loop .L003maw_sse2_exit: movd %mm1,%eax emms ret .align 16 .L000maw_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %esi,%esi movl 20(%esp),%edi movl 28(%esp),%ecx movl 24(%esp),%ebx andl $4294967288,%ecx movl 32(%esp),%ebp pushl %ecx jz .L005maw_finish .align 16 .L006maw_loop: movl (%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl (%edi),%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi movl 4(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 4(%edi),%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi movl 8(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 8(%edi),%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi movl 12(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 12(%edi),%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi movl 16(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 16(%edi),%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi movl 20(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 20(%edi),%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi movl 24(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 24(%edi),%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi movl 28(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 28(%edi),%eax adcl $0,%edx movl %eax,28(%edi) movl %edx,%esi subl $8,%ecx leal 32(%ebx),%ebx leal 32(%edi),%edi jnz .L006maw_loop .L005maw_finish: movl 32(%esp),%ecx andl $7,%ecx jnz .L007maw_finish2 jmp .L008maw_end .L007maw_finish2: movl (%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl (%edi),%eax adcl $0,%edx decl %ecx movl %eax,(%edi) movl %edx,%esi jz .L008maw_end movl 4(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 4(%edi),%eax adcl $0,%edx decl %ecx movl %eax,4(%edi) movl %edx,%esi jz .L008maw_end movl 8(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 8(%edi),%eax adcl $0,%edx decl %ecx movl %eax,8(%edi) movl %edx,%esi jz .L008maw_end movl 12(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 12(%edi),%eax adcl $0,%edx decl %ecx movl %eax,12(%edi) movl %edx,%esi jz .L008maw_end movl 16(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 16(%edi),%eax adcl $0,%edx decl %ecx movl %eax,16(%edi) movl %edx,%esi jz .L008maw_end movl 20(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 20(%edi),%eax adcl $0,%edx decl %ecx movl %eax,20(%edi) movl %edx,%esi jz .L008maw_end movl 24(%ebx),%eax mull %ebp addl %esi,%eax adcl $0,%edx addl 24(%edi),%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi .L008maw_end: movl %esi,%eax popl %ecx popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_add_words,.-.L_bn_mul_add_words_begin .globl bn_mul_words .type bn_mul_words,@function .align 16 bn_mul_words: .L_bn_mul_words_begin: leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) jnc .L009mw_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx movd 16(%esp),%mm0 pxor %mm1,%mm1 .align 16 .L010mw_sse2_loop: movd (%edx),%mm2 pmuludq %mm0,%mm2 leal 4(%edx),%edx paddq %mm2,%mm1 movd %mm1,(%eax) subl $1,%ecx psrlq $32,%mm1 leal 4(%eax),%eax jnz .L010mw_sse2_loop movd %mm1,%eax emms ret .align 16 .L009mw_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %esi,%esi movl 20(%esp),%edi movl 24(%esp),%ebx movl 28(%esp),%ebp movl 32(%esp),%ecx andl $4294967288,%ebp jz .L011mw_finish .L012mw_loop: movl (%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi movl 4(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi movl 8(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi movl 12(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi movl 16(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi movl 20(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi movl 24(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi movl 28(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,28(%edi) movl %edx,%esi addl $32,%ebx addl $32,%edi subl $8,%ebp jz .L011mw_finish jmp .L012mw_loop .L011mw_finish: movl 28(%esp),%ebp andl $7,%ebp jnz .L013mw_finish2 jmp .L014mw_end .L013mw_finish2: movl (%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 4(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,4(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 8(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,8(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 12(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,12(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 16(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,16(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 20(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,20(%edi) movl %edx,%esi decl %ebp jz .L014mw_end movl 24(%ebx),%eax mull %ecx addl %esi,%eax adcl $0,%edx movl %eax,24(%edi) movl %edx,%esi .L014mw_end: movl %esi,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_words,.-.L_bn_mul_words_begin .globl bn_sqr_words .type bn_sqr_words,@function .align 16 bn_sqr_words: .L_bn_sqr_words_begin: leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) jnc .L015sqr_non_sse2 movl 4(%esp),%eax movl 8(%esp),%edx movl 12(%esp),%ecx .align 16 .L016sqr_sse2_loop: movd (%edx),%mm0 pmuludq %mm0,%mm0 leal 4(%edx),%edx movq %mm0,(%eax) subl $1,%ecx leal 8(%eax),%eax jnz .L016sqr_sse2_loop emms ret .align 16 .L015sqr_non_sse2: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%ebx andl $4294967288,%ebx jz .L017sw_finish .L018sw_loop: movl (%edi),%eax mull %eax movl %eax,(%esi) movl %edx,4(%esi) movl 4(%edi),%eax mull %eax movl %eax,8(%esi) movl %edx,12(%esi) movl 8(%edi),%eax mull %eax movl %eax,16(%esi) movl %edx,20(%esi) movl 12(%edi),%eax mull %eax movl %eax,24(%esi) movl %edx,28(%esi) movl 16(%edi),%eax mull %eax movl %eax,32(%esi) movl %edx,36(%esi) movl 20(%edi),%eax mull %eax movl %eax,40(%esi) movl %edx,44(%esi) movl 24(%edi),%eax mull %eax movl %eax,48(%esi) movl %edx,52(%esi) movl 28(%edi),%eax mull %eax movl %eax,56(%esi) movl %edx,60(%esi) addl $32,%edi addl $64,%esi subl $8,%ebx jnz .L018sw_loop .L017sw_finish: movl 28(%esp),%ebx andl $7,%ebx jz .L019sw_end movl (%edi),%eax mull %eax movl %eax,(%esi) decl %ebx movl %edx,4(%esi) jz .L019sw_end movl 4(%edi),%eax mull %eax movl %eax,8(%esi) decl %ebx movl %edx,12(%esi) jz .L019sw_end movl 8(%edi),%eax mull %eax movl %eax,16(%esi) decl %ebx movl %edx,20(%esi) jz .L019sw_end movl 12(%edi),%eax mull %eax movl %eax,24(%esi) decl %ebx movl %edx,28(%esi) jz .L019sw_end movl 16(%edi),%eax mull %eax movl %eax,32(%esi) decl %ebx movl %edx,36(%esi) jz .L019sw_end movl 20(%edi),%eax mull %eax movl %eax,40(%esi) decl %ebx movl %edx,44(%esi) jz .L019sw_end movl 24(%edi),%eax mull %eax movl %eax,48(%esi) movl %edx,52(%esi) .L019sw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sqr_words,.-.L_bn_sqr_words_begin .globl bn_div_words .type bn_div_words,@function .align 16 bn_div_words: .L_bn_div_words_begin: movl 4(%esp),%edx movl 8(%esp),%eax movl 12(%esp),%ecx divl %ecx ret .size bn_div_words,.-.L_bn_div_words_begin .globl bn_add_words .type bn_add_words,@function .align 16 bn_add_words: .L_bn_add_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L020aw_finish .L021aw_loop: movl (%esi),%ecx movl (%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L021aw_loop .L020aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L022aw_end movl (%esi),%ecx movl (%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L022aw_end movl 4(%esi),%ecx movl 4(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L022aw_end movl 8(%esi),%ecx movl 8(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L022aw_end movl 12(%esi),%ecx movl 12(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L022aw_end movl 16(%esi),%ecx movl 16(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L022aw_end movl 20(%esi),%ecx movl 20(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L022aw_end movl 24(%esi),%ecx movl 24(%edi),%edx addl %eax,%ecx movl $0,%eax adcl %eax,%eax addl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) .L022aw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_add_words,.-.L_bn_add_words_begin .globl bn_sub_words .type bn_sub_words,@function .align 16 bn_sub_words: .L_bn_sub_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L023aw_finish .L024aw_loop: movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L024aw_loop .L023aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L025aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L025aw_end movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L025aw_end movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L025aw_end movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L025aw_end movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L025aw_end movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L025aw_end movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) .L025aw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sub_words,.-.L_bn_sub_words_begin .globl bn_sub_part_words .type bn_sub_part_words,@function .align 16 bn_sub_part_words: .L_bn_sub_part_words_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%ebx movl 24(%esp),%esi movl 28(%esp),%edi movl 32(%esp),%ebp xorl %eax,%eax andl $4294967288,%ebp jz .L026aw_finish .L027aw_loop: movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl 4(%esi),%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl 8(%esi),%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl 12(%esi),%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl 16(%esi),%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl 20(%esi),%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl 24(%esi),%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl 28(%esi),%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%esi addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L027aw_loop .L026aw_finish: movl 32(%esp),%ebp andl $7,%ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx decl %ebp jz .L028aw_end movl (%esi),%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) addl $4,%esi addl $4,%edi addl $4,%ebx .L028aw_end: cmpl $0,36(%esp) je .L029pw_end movl 36(%esp),%ebp cmpl $0,%ebp je .L029pw_end jge .L030pw_pos movl $0,%edx subl %ebp,%edx movl %edx,%ebp andl $4294967288,%ebp jz .L031pw_neg_finish .L032pw_neg_loop: movl $0,%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,(%ebx) movl $0,%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,4(%ebx) movl $0,%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,8(%ebx) movl $0,%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,12(%ebx) movl $0,%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,16(%ebx) movl $0,%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,20(%ebx) movl $0,%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) movl $0,%ecx movl 28(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,28(%ebx) addl $32,%edi addl $32,%ebx subl $8,%ebp jnz .L032pw_neg_loop .L031pw_neg_finish: movl 36(%esp),%edx movl $0,%ebp subl %edx,%ebp andl $7,%ebp jz .L029pw_end movl $0,%ecx movl (%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,(%ebx) jz .L029pw_end movl $0,%ecx movl 4(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,4(%ebx) jz .L029pw_end movl $0,%ecx movl 8(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,8(%ebx) jz .L029pw_end movl $0,%ecx movl 12(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,12(%ebx) jz .L029pw_end movl $0,%ecx movl 16(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,16(%ebx) jz .L029pw_end movl $0,%ecx movl 20(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax decl %ebp movl %ecx,20(%ebx) jz .L029pw_end movl $0,%ecx movl 24(%edi),%edx subl %eax,%ecx movl $0,%eax adcl %eax,%eax subl %edx,%ecx adcl $0,%eax movl %ecx,24(%ebx) jmp .L029pw_end .L030pw_pos: andl $4294967288,%ebp jz .L033pw_pos_finish .L034pw_pos_loop: movl (%esi),%ecx subl %eax,%ecx movl %ecx,(%ebx) jnc .L035pw_nc0 movl 4(%esi),%ecx subl %eax,%ecx movl %ecx,4(%ebx) jnc .L036pw_nc1 movl 8(%esi),%ecx subl %eax,%ecx movl %ecx,8(%ebx) jnc .L037pw_nc2 movl 12(%esi),%ecx subl %eax,%ecx movl %ecx,12(%ebx) jnc .L038pw_nc3 movl 16(%esi),%ecx subl %eax,%ecx movl %ecx,16(%ebx) jnc .L039pw_nc4 movl 20(%esi),%ecx subl %eax,%ecx movl %ecx,20(%ebx) jnc .L040pw_nc5 movl 24(%esi),%ecx subl %eax,%ecx movl %ecx,24(%ebx) jnc .L041pw_nc6 movl 28(%esi),%ecx subl %eax,%ecx movl %ecx,28(%ebx) jnc .L042pw_nc7 addl $32,%esi addl $32,%ebx subl $8,%ebp jnz .L034pw_pos_loop .L033pw_pos_finish: movl 36(%esp),%ebp andl $7,%ebp jz .L029pw_end movl (%esi),%ecx subl %eax,%ecx movl %ecx,(%ebx) jnc .L043pw_tail_nc0 decl %ebp jz .L029pw_end movl 4(%esi),%ecx subl %eax,%ecx movl %ecx,4(%ebx) jnc .L044pw_tail_nc1 decl %ebp jz .L029pw_end movl 8(%esi),%ecx subl %eax,%ecx movl %ecx,8(%ebx) jnc .L045pw_tail_nc2 decl %ebp jz .L029pw_end movl 12(%esi),%ecx subl %eax,%ecx movl %ecx,12(%ebx) jnc .L046pw_tail_nc3 decl %ebp jz .L029pw_end movl 16(%esi),%ecx subl %eax,%ecx movl %ecx,16(%ebx) jnc .L047pw_tail_nc4 decl %ebp jz .L029pw_end movl 20(%esi),%ecx subl %eax,%ecx movl %ecx,20(%ebx) jnc .L048pw_tail_nc5 decl %ebp jz .L029pw_end movl 24(%esi),%ecx subl %eax,%ecx movl %ecx,24(%ebx) jnc .L049pw_tail_nc6 movl $1,%eax jmp .L029pw_end .L050pw_nc_loop: movl (%esi),%ecx movl %ecx,(%ebx) .L035pw_nc0: movl 4(%esi),%ecx movl %ecx,4(%ebx) .L036pw_nc1: movl 8(%esi),%ecx movl %ecx,8(%ebx) .L037pw_nc2: movl 12(%esi),%ecx movl %ecx,12(%ebx) .L038pw_nc3: movl 16(%esi),%ecx movl %ecx,16(%ebx) .L039pw_nc4: movl 20(%esi),%ecx movl %ecx,20(%ebx) .L040pw_nc5: movl 24(%esi),%ecx movl %ecx,24(%ebx) .L041pw_nc6: movl 28(%esi),%ecx movl %ecx,28(%ebx) .L042pw_nc7: addl $32,%esi addl $32,%ebx subl $8,%ebp jnz .L050pw_nc_loop movl 36(%esp),%ebp andl $7,%ebp jz .L051pw_nc_end movl (%esi),%ecx movl %ecx,(%ebx) .L043pw_tail_nc0: decl %ebp jz .L051pw_nc_end movl 4(%esi),%ecx movl %ecx,4(%ebx) .L044pw_tail_nc1: decl %ebp jz .L051pw_nc_end movl 8(%esi),%ecx movl %ecx,8(%ebx) .L045pw_tail_nc2: decl %ebp jz .L051pw_nc_end movl 12(%esi),%ecx movl %ecx,12(%ebx) .L046pw_tail_nc3: decl %ebp jz .L051pw_nc_end movl 16(%esi),%ecx movl %ecx,16(%ebx) .L047pw_tail_nc4: decl %ebp jz .L051pw_nc_end movl 20(%esi),%ecx movl %ecx,20(%ebx) .L048pw_tail_nc5: decl %ebp jz .L051pw_nc_end movl 24(%esi),%ecx movl %ecx,24(%ebx) .L049pw_tail_nc6: .L051pw_nc_end: movl $0,%eax .L029pw_end: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_sub_part_words,.-.L_bn_sub_part_words_begin .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/cmll-x86.S =================================================================== --- head/secure/lib/libcrypto/i386/cmll-x86.S (revision 299480) +++ head/secure/lib/libcrypto/i386/cmll-x86.S (revision 299481) @@ -1,4754 +1,4755 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from cmll-x86.pl. #ifdef PIC .file "cmll-x86.S" .text .globl Camellia_EncryptBlock_Rounds .type Camellia_EncryptBlock_Rounds,@function .align 16 Camellia_EncryptBlock_Rounds: .L_Camellia_EncryptBlock_Rounds_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax leal (%edi,%eax,1),%eax movl %ebx,20(%esp) movl %eax,16(%esp) call .L000pic_point .L000pic_point: popl %ebp leal .LCamellia_SBOX-.L000pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 20(%esp),%esp bswap %eax movl 32(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_EncryptBlock_Rounds,.-.L_Camellia_EncryptBlock_Rounds_begin .globl Camellia_EncryptBlock .type Camellia_EncryptBlock,@function .align 16 Camellia_EncryptBlock: .L_Camellia_EncryptBlock_begin: movl $128,%eax subl 4(%esp),%eax movl $3,%eax adcl $0,%eax movl %eax,4(%esp) jmp .L_Camellia_EncryptBlock_Rounds_begin .size Camellia_EncryptBlock,.-.L_Camellia_EncryptBlock_begin .globl Camellia_encrypt .type Camellia_encrypt,@function .align 16 Camellia_encrypt: .L_Camellia_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp movl 272(%edi),%eax leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax leal (%edi,%eax,1),%eax movl %ebx,20(%esp) movl %eax,16(%esp) call .L001pic_point .L001pic_point: popl %ebp leal .LCamellia_SBOX-.L001pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 20(%esp),%esp bswap %eax movl 24(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_encrypt,.-.L_Camellia_encrypt_begin .type _x86_Camellia_encrypt,@function .align 16 _x86_Camellia_encrypt: xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 16(%edi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) .align 16 .L002loop: xorl %esi,%eax xorl 20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 24(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 32(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl 36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 40(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 48(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl 52(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 56(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 60(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 64(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) addl $64,%edi cmpl 20(%esp),%edi je .L003done andl %eax,%esi movl 16(%esp),%edx roll $1,%esi movl %edx,%ecx xorl %esi,%ebx orl 12(%edi),%ecx movl %ebx,8(%esp) xorl 12(%esp),%ecx movl 4(%edi),%esi movl %ecx,12(%esp) orl %ebx,%esi andl 8(%edi),%ecx xorl %esi,%eax roll $1,%ecx movl %eax,4(%esp) xorl %ecx,%edx movl 16(%edi),%esi movl %edx,16(%esp) jmp .L002loop .align 8 .L003done: movl %eax,%ecx movl %ebx,%edx movl 12(%esp),%eax movl 16(%esp),%ebx xorl %esi,%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .size _x86_Camellia_encrypt,.-_x86_Camellia_encrypt .globl Camellia_DecryptBlock_Rounds .type Camellia_DecryptBlock_Rounds,@function .align 16 Camellia_DecryptBlock_Rounds: .L_Camellia_DecryptBlock_Rounds_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax movl %edi,16(%esp) leal (%edi,%eax,1),%edi movl %ebx,20(%esp) call .L004pic_point .L004pic_point: popl %ebp leal .LCamellia_SBOX-.L004pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 20(%esp),%esp bswap %eax movl 32(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_DecryptBlock_Rounds,.-.L_Camellia_DecryptBlock_Rounds_begin .globl Camellia_DecryptBlock .type Camellia_DecryptBlock,@function .align 16 Camellia_DecryptBlock: .L_Camellia_DecryptBlock_begin: movl $128,%eax subl 4(%esp),%eax movl $3,%eax adcl $0,%eax movl %eax,4(%esp) jmp .L_Camellia_DecryptBlock_Rounds_begin .size Camellia_DecryptBlock,.-.L_Camellia_DecryptBlock_begin .globl Camellia_decrypt .type Camellia_decrypt,@function .align 16 Camellia_decrypt: .L_Camellia_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp movl 272(%edi),%eax leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax movl %edi,16(%esp) leal (%edi,%eax,1),%edi movl %ebx,20(%esp) call .L005pic_point .L005pic_point: popl %ebp leal .LCamellia_SBOX-.L005pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 20(%esp),%esp bswap %eax movl 24(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_decrypt,.-.L_Camellia_decrypt_begin .type _x86_Camellia_decrypt,@function .align 16 _x86_Camellia_decrypt: xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl -8(%edi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) .align 16 .L006loop: xorl %esi,%eax xorl -4(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -16(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -12(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -24(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl -20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -32(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -40(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl -36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -48(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -56(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) subl $64,%edi cmpl 20(%esp),%edi je .L007done andl %eax,%esi movl 16(%esp),%edx roll $1,%esi movl %edx,%ecx xorl %esi,%ebx orl 4(%edi),%ecx movl %ebx,8(%esp) xorl 12(%esp),%ecx movl 12(%edi),%esi movl %ecx,12(%esp) orl %ebx,%esi andl (%edi),%ecx xorl %esi,%eax roll $1,%ecx movl %eax,4(%esp) xorl %ecx,%edx movl -8(%edi),%esi movl %edx,16(%esp) jmp .L006loop .align 8 .L007done: movl %eax,%ecx movl %ebx,%edx movl 12(%esp),%eax movl 16(%esp),%ebx xorl %esi,%ecx xorl 12(%edi),%edx xorl (%edi),%eax xorl 4(%edi),%ebx ret .size _x86_Camellia_decrypt,.-_x86_Camellia_decrypt .globl Camellia_Ekeygen .type Camellia_Ekeygen,@function .align 16 Camellia_Ekeygen: .L_Camellia_Ekeygen_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $16,%esp movl 36(%esp),%ebp movl 40(%esp),%esi movl 44(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) cmpl $128,%ebp je .L0081st128 movl 16(%esi),%eax movl 20(%esi),%ebx cmpl $192,%ebp je .L0091st192 movl 24(%esi),%ecx movl 28(%esi),%edx jmp .L0101st256 .align 4 .L0091st192: movl %eax,%ecx movl %ebx,%edx notl %ecx notl %edx .align 4 .L0101st256: bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,32(%edi) movl %ebx,36(%edi) movl %ecx,40(%edi) movl %edx,44(%edi) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx .align 4 .L0081st128: call .L011pic_point .L011pic_point: popl %ebp leal .LCamellia_SBOX-.L011pic_point(%ebp),%ebp leal .LCamellia_SIGMA-.LCamellia_SBOX(%ebp),%edi movl (%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 4(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 8(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 12(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 16(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 44(%esp),%esi xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 16(%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 24(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 32(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 36(%esp),%esi cmpl $128,%esi jne .L0122nd256 movl 44(%esp),%edi leal 128(%edi),%edi movl %eax,-112(%edi) movl %ebx,-108(%edi) movl %ecx,-104(%edi) movl %edx,-100(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-80(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-76(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-72(%edi) movl %edx,-68(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-64(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-60(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-56(%edi) movl %edx,-52(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-32(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-28(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-16(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-12(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-8(%edi) movl %edx,-4(%edi) movl %ebx,%ebp shll $2,%ebx movl %ecx,%esi shrl $30,%esi shll $2,%ecx orl %esi,%ebx movl %edx,%esi shll $2,%edx movl %ebx,32(%edi) shrl $30,%esi orl %esi,%ecx shrl $30,%ebp movl %eax,%esi shrl $30,%esi movl %ecx,36(%edi) shll $2,%eax orl %esi,%edx orl %ebp,%eax movl %edx,40(%edi) movl %eax,44(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,64(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,68(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,72(%edi) movl %eax,76(%edi) movl -128(%edi),%ebx movl -124(%edi),%ecx movl -120(%edi),%edx movl -116(%edi),%eax movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-96(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-92(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-88(%edi) movl %eax,-84(%edi) movl %ebx,%ebp shll $30,%ebx movl %ecx,%esi shrl $2,%esi shll $30,%ecx orl %esi,%ebx movl %edx,%esi shll $30,%edx movl %ebx,-48(%edi) shrl $2,%esi orl %esi,%ecx shrl $2,%ebp movl %eax,%esi shrl $2,%esi movl %ecx,-44(%edi) shll $30,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-40(%edi) movl %eax,-36(%edi) movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-24(%edi) movl %eax,-20(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,4(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,8(%edi) movl %eax,12(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,16(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,20(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,24(%edi) movl %eax,28(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,48(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,52(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,56(%edi) movl %eax,60(%edi) movl $3,%eax jmp .L013done .align 16 .L0122nd256: movl 44(%esp),%esi movl %eax,48(%esi) movl %ebx,52(%esi) movl %ecx,56(%esi) movl %edx,60(%esi) xorl 32(%esi),%eax xorl 36(%esi),%ebx xorl 40(%esi),%ecx xorl 44(%esi),%edx movl 32(%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 40(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 48(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 44(%esp),%edi leal 128(%edi),%edi movl %eax,-112(%edi) movl %ebx,-108(%edi) movl %ecx,-104(%edi) movl %edx,-100(%edi) movl %eax,%ebp shll $30,%eax movl %ebx,%esi shrl $2,%esi shll $30,%ebx orl %esi,%eax movl %ecx,%esi shll $30,%ecx movl %eax,-48(%edi) shrl $2,%esi orl %esi,%ebx shrl $2,%ebp movl %edx,%esi shrl $2,%esi movl %ebx,-44(%edi) shll $30,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-40(%edi) movl %edx,-36(%edi) movl %eax,%ebp shll $30,%eax movl %ebx,%esi shrl $2,%esi shll $30,%ebx orl %esi,%eax movl %ecx,%esi shll $30,%ecx movl %eax,32(%edi) shrl $2,%esi orl %esi,%ebx shrl $2,%ebp movl %edx,%esi shrl $2,%esi movl %ebx,36(%edi) shll $30,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,40(%edi) movl %edx,44(%edi) movl %ebx,%ebp shll $19,%ebx movl %ecx,%esi shrl $13,%esi shll $19,%ecx orl %esi,%ebx movl %edx,%esi shll $19,%edx movl %ebx,128(%edi) shrl $13,%esi orl %esi,%ecx shrl $13,%ebp movl %eax,%esi shrl $13,%esi movl %ecx,132(%edi) shll $19,%eax orl %esi,%edx orl %ebp,%eax movl %edx,136(%edi) movl %eax,140(%edi) movl -96(%edi),%ebx movl -92(%edi),%ecx movl -88(%edi),%edx movl -84(%edi),%eax movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-96(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-92(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-88(%edi) movl %eax,-84(%edi) movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-64(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-60(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-56(%edi) movl %eax,-52(%edi) movl %ebx,%ebp shll $30,%ebx movl %ecx,%esi shrl $2,%esi shll $30,%ecx orl %esi,%ebx movl %edx,%esi shll $30,%edx movl %ebx,16(%edi) shrl $2,%esi orl %esi,%ecx shrl $2,%ebp movl %eax,%esi shrl $2,%esi movl %ecx,20(%edi) shll $30,%eax orl %esi,%edx orl %ebp,%eax movl %edx,24(%edi) movl %eax,28(%edi) movl %ecx,%ebp shll $2,%ecx movl %edx,%esi shrl $30,%esi shll $2,%edx orl %esi,%ecx movl %eax,%esi shll $2,%eax movl %ecx,80(%edi) shrl $30,%esi orl %esi,%edx shrl $30,%ebp movl %ebx,%esi shrl $30,%esi movl %edx,84(%edi) shll $2,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,88(%edi) movl %ebx,92(%edi) movl -80(%edi),%ecx movl -76(%edi),%edx movl -72(%edi),%eax movl -68(%edi),%ebx movl %ecx,%ebp shll $15,%ecx movl %edx,%esi shrl $17,%esi shll $15,%edx orl %esi,%ecx movl %eax,%esi shll $15,%eax movl %ecx,-80(%edi) shrl $17,%esi orl %esi,%edx shrl $17,%ebp movl %ebx,%esi shrl $17,%esi movl %edx,-76(%edi) shll $15,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,-72(%edi) movl %ebx,-68(%edi) movl %ecx,%ebp shll $30,%ecx movl %edx,%esi shrl $2,%esi shll $30,%edx orl %esi,%ecx movl %eax,%esi shll $30,%eax movl %ecx,-16(%edi) shrl $2,%esi orl %esi,%edx shrl $2,%ebp movl %ebx,%esi shrl $2,%esi movl %edx,-12(%edi) shll $30,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,-8(%edi) movl %ebx,-4(%edi) movl %edx,64(%edi) movl %eax,68(%edi) movl %ebx,72(%edi) movl %ecx,76(%edi) movl %edx,%ebp shll $17,%edx movl %eax,%esi shrl $15,%esi shll $17,%eax orl %esi,%edx movl %ebx,%esi shll $17,%ebx movl %edx,96(%edi) shrl $15,%esi orl %esi,%eax shrl $15,%ebp movl %ecx,%esi shrl $15,%esi movl %eax,100(%edi) shll $17,%ecx orl %esi,%ebx orl %ebp,%ecx movl %ebx,104(%edi) movl %ecx,108(%edi) movl -128(%edi),%edx movl -124(%edi),%eax movl -120(%edi),%ebx movl -116(%edi),%ecx movl %eax,%ebp shll $13,%eax movl %ebx,%esi shrl $19,%esi shll $13,%ebx orl %esi,%eax movl %ecx,%esi shll $13,%ecx movl %eax,-32(%edi) shrl $19,%esi orl %esi,%ebx shrl $19,%ebp movl %edx,%esi shrl $19,%esi movl %ebx,-28(%edi) shll $13,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-24(%edi) movl %edx,-20(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,4(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,8(%edi) movl %edx,12(%edi) movl %eax,%ebp shll $17,%eax movl %ebx,%esi shrl $15,%esi shll $17,%ebx orl %esi,%eax movl %ecx,%esi shll $17,%ecx movl %eax,48(%edi) shrl $15,%esi orl %esi,%ebx shrl $15,%ebp movl %edx,%esi shrl $15,%esi movl %ebx,52(%edi) shll $17,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,56(%edi) movl %edx,60(%edi) movl %ebx,%ebp shll $2,%ebx movl %ecx,%esi shrl $30,%esi shll $2,%ecx orl %esi,%ebx movl %edx,%esi shll $2,%edx movl %ebx,112(%edi) shrl $30,%esi orl %esi,%ecx shrl $30,%ebp movl %eax,%esi shrl $30,%esi movl %ecx,116(%edi) shll $2,%eax orl %esi,%edx orl %ebp,%eax movl %edx,120(%edi) movl %eax,124(%edi) movl $4,%eax .L013done: leal 144(%edi),%edx addl $16,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_Ekeygen,.-.L_Camellia_Ekeygen_begin .globl private_Camellia_set_key .type private_Camellia_set_key,@function .align 16 private_Camellia_set_key: .L_private_Camellia_set_key_begin: pushl %ebx movl 8(%esp),%ecx movl 12(%esp),%ebx movl 16(%esp),%edx movl $-1,%eax testl %ecx,%ecx jz .L014done testl %edx,%edx jz .L014done movl $-2,%eax cmpl $256,%ebx je .L015arg_ok cmpl $192,%ebx je .L015arg_ok cmpl $128,%ebx jne .L014done .align 4 .L015arg_ok: pushl %edx pushl %ecx pushl %ebx call .L_Camellia_Ekeygen_begin addl $12,%esp movl %eax,(%edx) xorl %eax,%eax .align 4 .L014done: popl %ebx ret .size private_Camellia_set_key,.-.L_private_Camellia_set_key_begin .align 64 .LCamellia_SIGMA: .long 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 .align 64 .LCamellia_SBOX: .long 1886416896,1886388336 .long 2189591040,741081132 .long 741092352,3014852787 .long 3974949888,3233808576 .long 3014898432,3840147684 .long 656877312,1465319511 .long 3233857536,3941204202 .long 3857048832,2930639022 .long 3840205824,589496355 .long 2240120064,1802174571 .long 1465341696,1162149957 .long 892679424,2779054245 .long 3941263872,3991732461 .long 202116096,1330577487 .long 2930683392,488439837 .long 1094795520,2459041938 .long 589505280,2256928902 .long 4025478912,2947481775 .long 1802201856,2088501372 .long 2475922176,522125343 .long 1162167552,1044250686 .long 421075200,3705405660 .long 2779096320,1583218782 .long 555819264,185270283 .long 3991792896,2795896998 .long 235802112,960036921 .long 1330597632,3587506389 .long 1313754624,1566376029 .long 488447232,3654877401 .long 1701143808,1515847770 .long 2459079168,1364262993 .long 3183328512,1819017324 .long 2256963072,2341142667 .long 3099113472,2593783962 .long 2947526400,4227531003 .long 2408550144,2964324528 .long 2088532992,1953759348 .long 3958106880,724238379 .long 522133248,4042260720 .long 3469659648,2223243396 .long 1044266496,3755933919 .long 808464384,3419078859 .long 3705461760,875823156 .long 1600085760,1987444854 .long 1583242752,1835860077 .long 3318072576,2846425257 .long 185273088,3520135377 .long 437918208,67371012 .long 2795939328,336855060 .long 3789676800,976879674 .long 960051456,3739091166 .long 3402287616,286326801 .long 3587560704,842137650 .long 1195853568,2627469468 .long 1566399744,1397948499 .long 1027423488,4075946226 .long 3654932736,4278059262 .long 16843008,3486449871 .long 1515870720,3284336835 .long 3604403712,2054815866 .long 1364283648,606339108 .long 1448498688,3907518696 .long 1819044864,1616904288 .long 1296911616,1768489065 .long 2341178112,2863268010 .long 218959104,2694840480 .long 2593823232,2711683233 .long 1717986816,1650589794 .long 4227595008,1414791252 .long 3435973632,505282590 .long 2964369408,3772776672 .long 757935360,1684275300 .long 1953788928,269484048 .long 303174144,0 .long 724249344,2745368739 .long 538976256,1970602101 .long 4042321920,2324299914 .long 2981212416,3873833190 .long 2223277056,151584777 .long 2576980224,3722248413 .long 3755990784,2273771655 .long 1280068608,2206400643 .long 3419130624,3452764365 .long 3267543552,2425356432 .long 875836416,1936916595 .long 2122219008,4143317238 .long 1987474944,2644312221 .long 84215040,3216965823 .long 1835887872,1381105746 .long 3082270464,3638034648 .long 2846468352,3368550600 .long 825307392,3334865094 .long 3520188672,2172715137 .long 387389184,1869545583 .long 67372032,320012307 .long 3621246720,1667432547 .long 336860160,3924361449 .long 1482184704,2812739751 .long 976894464,2677997727 .long 1633771776,3166437564 .long 3739147776,690552873 .long 454761216,4193845497 .long 286331136,791609391 .long 471604224,3031695540 .long 842150400,2021130360 .long 252645120,101056518 .long 2627509248,3890675943 .long 370546176,1903231089 .long 1397969664,3570663636 .long 404232192,2880110763 .long 4076007936,2290614408 .long 572662272,2374828173 .long 4278124032,1920073842 .long 1145324544,3115909305 .long 3486502656,4177002744 .long 2998055424,2896953516 .long 3284386560,909508662 .long 3048584448,707395626 .long 2054846976,1010565180 .long 2442236160,4059103473 .long 606348288,1077936192 .long 134744064,3553820883 .long 3907577856,3149594811 .long 2829625344,1128464451 .long 1616928768,353697813 .long 4244438016,2913796269 .long 1768515840,2004287607 .long 1347440640,2155872384 .long 2863311360,2189557890 .long 3503345664,3974889708 .long 2694881280,656867367 .long 2105376000,3856990437 .long 2711724288,2240086149 .long 2307492096,892665909 .long 1650614784,202113036 .long 2543294208,1094778945 .long 1414812672,4025417967 .long 1532713728,2475884691 .long 505290240,421068825 .long 2509608192,555810849 .long 3772833792,235798542 .long 4294967040,1313734734 .long 1684300800,1701118053 .long 3537031680,3183280317 .long 269488128,3099066552 .long 3301229568,2408513679 .long 0,3958046955 .long 1212696576,3469607118 .long 2745410304,808452144 .long 4160222976,1600061535 .long 1970631936,3318022341 .long 3688618752,437911578 .long 2324335104,3789619425 .long 50529024,3402236106 .long 3873891840,1195835463 .long 3671775744,1027407933 .long 151587072,16842753 .long 1061109504,3604349142 .long 3722304768,1448476758 .long 2492765184,1296891981 .long 2273806080,218955789 .long 1549556736,1717960806 .long 2206434048,3435921612 .long 33686016,757923885 .long 3452816640,303169554 .long 1246382592,538968096 .long 2425393152,2981167281 .long 858993408,2576941209 .long 1936945920,1280049228 .long 1734829824,3267494082 .long 4143379968,2122186878 .long 4092850944,84213765 .long 2644352256,3082223799 .long 2139062016,825294897 .long 3217014528,387383319 .long 3806519808,3621191895 .long 1381126656,1482162264 .long 2610666240,1633747041 .long 3638089728,454754331 .long 640034304,471597084 .long 3368601600,252641295 .long 926365440,370540566 .long 3334915584,404226072 .long 993737472,572653602 .long 2172748032,1145307204 .long 2526451200,2998010034 .long 1869573888,3048538293 .long 1263225600,2442199185 .long 320017152,134742024 .long 3200171520,2829582504 .long 1667457792,4244373756 .long 774778368,1347420240 .long 3924420864,3503292624 .long 2038003968,2105344125 .long 2812782336,2307457161 .long 2358021120,2543255703 .long 2678038272,1532690523 .long 1852730880,2509570197 .long 3166485504,4294902015 .long 2391707136,3536978130 .long 690563328,3301179588 .long 4126536960,1212678216 .long 4193908992,4160159991 .long 3065427456,3688562907 .long 791621376,50528259 .long 4261281024,3671720154 .long 3031741440,1061093439 .long 1499027712,2492727444 .long 2021160960,1549533276 .long 2560137216,33685506 .long 101058048,1246363722 .long 1785358848,858980403 .long 3890734848,1734803559 .long 1179010560,4092788979 .long 1903259904,2139029631 .long 3132799488,3806462178 .long 3570717696,2610626715 .long 623191296,640024614 .long 2880154368,926351415 .long 1111638528,993722427 .long 2290649088,2526412950 .long 2728567296,1263206475 .long 2374864128,3200123070 .long 4210752000,774766638 .long 1920102912,2037973113 .long 117901056,2357985420 .long 3115956480,1852702830 .long 1431655680,2391670926 .long 4177065984,4126474485 .long 4008635904,3065381046 .long 2896997376,4261216509 .long 168430080,1499005017 .long 909522432,2560098456 .long 1229539584,1785331818 .long 707406336,1178992710 .long 1751672832,3132752058 .long 1010580480,623181861 .long 943208448,1111621698 .long 4059164928,2728525986 .long 2762253312,4210688250 .long 1077952512,117899271 .long 673720320,1431634005 .long 3553874688,4008575214 .long 2071689984,168427530 .long 3149642496,1229520969 .long 3385444608,1751646312 .long 1128481536,943194168 .long 3250700544,2762211492 .long 353703168,673710120 .long 3823362816,2071658619 .long 2913840384,3385393353 .long 4109693952,3250651329 .long 2004317952,3823304931 .long 3351758592,4109631732 .long 2155905024,3351707847 .long 2661195264,2661154974 .long 14737632,939538488 .long 328965,1090535745 .long 5789784,369104406 .long 14277081,1979741814 .long 6776679,3640711641 .long 5131854,2466288531 .long 8487297,1610637408 .long 13355979,4060148466 .long 13224393,1912631922 .long 723723,3254829762 .long 11447982,2868947883 .long 6974058,2583730842 .long 14013909,1962964341 .long 1579032,100664838 .long 6118749,1459640151 .long 8553090,2684395680 .long 4605510,2432733585 .long 14671839,4144035831 .long 14079702,3036722613 .long 2565927,3372272073 .long 9079434,2717950626 .long 3289650,2348846220 .long 4934475,3523269330 .long 4342338,2415956112 .long 14408667,4127258358 .long 1842204,117442311 .long 10395294,2801837991 .long 10263708,654321447 .long 3815994,2382401166 .long 13290186,2986390194 .long 2434341,1224755529 .long 8092539,3724599006 .long 855309,1124090691 .long 7434609,1543527516 .long 6250335,3607156695 .long 2039583,3338717127 .long 16316664,1040203326 .long 14145495,4110480885 .long 4079166,2399178639 .long 10329501,1728079719 .long 8158332,520101663 .long 6316128,402659352 .long 12171705,1845522030 .long 12500670,2936057775 .long 12369084,788541231 .long 9145227,3791708898 .long 1447446,2231403909 .long 3421236,218107149 .long 5066061,1392530259 .long 12829635,4026593520 .long 7500402,2617285788 .long 9803157,1694524773 .long 11250603,3925928682 .long 9342606,2734728099 .long 12237498,2919280302 .long 8026746,2650840734 .long 11776947,3959483628 .long 131586,2147516544 .long 11842740,754986285 .long 11382189,1795189611 .long 10658466,2818615464 .long 11316396,721431339 .long 14211288,905983542 .long 10132122,2785060518 .long 1513239,3305162181 .long 1710618,2248181382 .long 3487029,1291865421 .long 13421772,855651123 .long 16250871,4244700669 .long 10066329,1711302246 .long 6381921,1476417624 .long 5921370,2516620950 .long 15263976,973093434 .long 2368548,150997257 .long 5658198,2499843477 .long 4210752,268439568 .long 14803425,2013296760 .long 6513507,3623934168 .long 592137,1107313218 .long 3355443,3422604492 .long 12566463,4009816047 .long 10000536,637543974 .long 9934743,3842041317 .long 8750469,1627414881 .long 6842472,436214298 .long 16579836,1056980799 .long 15527148,989870907 .long 657930,2181071490 .long 14342874,3053500086 .long 7303023,3674266587 .long 5460819,3556824276 .long 6447714,2550175896 .long 10724259,3892373736 .long 3026478,2332068747 .long 526344,33554946 .long 11513775,3942706155 .long 2631720,167774730 .long 11579568,738208812 .long 7631988,486546717 .long 12763842,2952835248 .long 12434877,1862299503 .long 3552822,2365623693 .long 2236962,2281736328 .long 3684408,234884622 .long 6579300,419436825 .long 1973790,2264958855 .long 3750201,1308642894 .long 2894892,184552203 .long 10921638,2835392937 .long 3158064,201329676 .long 15066597,2030074233 .long 4473924,285217041 .long 16645629,2130739071 .long 8947848,570434082 .long 10461087,3875596263 .long 6645093,1493195097 .long 8882055,3774931425 .long 7039851,3657489114 .long 16053492,1023425853 .long 2302755,3355494600 .long 4737096,301994514 .long 1052688,67109892 .long 13750737,1946186868 .long 5329233,1409307732 .long 12632256,805318704 .long 16382457,2113961598 .long 13816530,3019945140 .long 10526880,671098920 .long 5592405,1426085205 .long 10592673,1744857192 .long 4276545,1342197840 .long 16448250,3187719870 .long 4408131,3489714384 .long 1250067,3288384708 .long 12895428,822096177 .long 3092271,3405827019 .long 11053224,704653866 .long 11974326,2902502829 .long 3947580,251662095 .long 2829099,3389049546 .long 12698049,1879076976 .long 16777215,4278255615 .long 13158600,838873650 .long 10855845,1761634665 .long 2105376,134219784 .long 9013641,1644192354 .long 0,0 .long 9474192,603989028 .long 4671303,3506491857 .long 15724527,4211145723 .long 15395562,3120609978 .long 12040119,3976261101 .long 1381653,1157645637 .long 394758,2164294017 .long 13487565,1929409395 .long 11908533,1828744557 .long 1184274,2214626436 .long 8289918,2667618207 .long 12303291,3993038574 .long 2697513,1241533002 .long 986895,3271607235 .long 12105912,771763758 .long 460551,3238052289 .long 263172,16777473 .long 10197915,3858818790 .long 9737364,620766501 .long 2171169,1207978056 .long 6710886,2566953369 .long 15132390,3103832505 .long 13553358,3003167667 .long 15592941,2063629179 .long 15198183,4177590777 .long 3881787,3456159438 .long 16711422,3204497343 .long 8355711,3741376479 .long 12961221,1895854449 .long 10790052,687876393 .long 3618615,3439381965 .long 11645361,1811967084 .long 5000268,318771987 .long 9539985,1677747300 .long 7237230,2600508315 .long 9276813,1660969827 .long 7763574,2634063261 .long 197379,3221274816 .long 2960685,1258310475 .long 14606046,3070277559 .long 9868950,2768283045 .long 2500134,2298513801 .long 8224125,1593859935 .long 13027014,2969612721 .long 6052956,385881879 .long 13882323,4093703412 .long 15921906,3154164924 .long 5197647,3540046803 .long 1644825,1174423110 .long 4144959,3472936911 .long 14474460,922761015 .long 7960953,1577082462 .long 1907997,1191200583 .long 5395026,2483066004 .long 15461355,4194368250 .long 15987699,4227923196 .long 7171437,1526750043 .long 6184542,2533398423 .long 16514043,4261478142 .long 6908265,1509972570 .long 11711154,2885725356 .long 15790320,1006648380 .long 3223857,1275087948 .long 789516,50332419 .long 13948116,889206069 .long 13619151,4076925939 .long 9211020,587211555 .long 14869218,3087055032 .long 7697781,1560304989 .long 11119017,1778412138 .long 4868682,2449511058 .long 5723991,3573601749 .long 8684676,553656609 .long 1118481,1140868164 .long 4539717,1358975313 .long 1776411,3321939654 .long 16119285,2097184125 .long 15000804,956315961 .long 921102,2197848963 .long 7566195,3691044060 .long 11184810,2852170410 .long 15856113,2080406652 .long 14540253,1996519287 .long 5855577,1442862678 .long 1315860,83887365 .long 7105644,452991771 .long 9605778,2751505572 .long 5526612,352326933 .long 13684944,872428596 .long 7895160,503324190 .long 7368816,469769244 .long 14935011,4160813304 .long 4802889,1375752786 .long 8421504,536879136 .long 5263440,335549460 .long 10987431,3909151209 .long 16185078,3170942397 .long 7829367,3707821533 .long 9671571,3825263844 .long 8816262,2701173153 .long 8618883,3758153952 .long 2763306,2315291274 .long 13092807,4043370993 .long 5987163,3590379222 .long 15329769,2046851706 .long 15658734,3137387451 .long 9408399,3808486371 .long 65793,1073758272 .long 4013373,1325420367 .globl Camellia_cbc_encrypt .type Camellia_cbc_encrypt,@function .align 16 Camellia_cbc_encrypt: .L_Camellia_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ecx cmpl $0,%ecx je .L016enc_out pushfl cld movl 24(%esp),%eax movl 28(%esp),%ebx movl 36(%esp),%edx movl 40(%esp),%ebp leal -64(%esp),%esi andl $-64,%esi leal -127(%edx),%edi subl %esi,%edi negl %edi andl $960,%edi subl %edi,%esi movl 44(%esp),%edi xchgl %esi,%esp addl $4,%esp movl %esi,20(%esp) movl %eax,24(%esp) movl %ebx,28(%esp) movl %ecx,32(%esp) movl %edx,36(%esp) movl %ebp,40(%esp) call .L017pic_point .L017pic_point: popl %ebp leal .LCamellia_SBOX-.L017pic_point(%ebp),%ebp movl $32,%esi .align 4 .L018prefetch_sbox: movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx leal 128(%ebp),%ebp decl %esi jnz .L018prefetch_sbox movl 36(%esp),%eax subl $4096,%ebp movl 24(%esp),%esi movl 272(%eax),%edx cmpl $0,%edi je .L019DECRYPT movl 32(%esp),%ecx movl 40(%esp),%edi shll $6,%edx leal (%eax,%edx,1),%edx movl %edx,16(%esp) testl $4294967280,%ecx jz .L020enc_tail movl (%edi),%eax movl 4(%edi),%ebx .align 4 .L021enc_loop: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx bswap %eax xorl 12(%esi),%edx bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 24(%esp),%esi movl 28(%esp),%edi bswap %eax bswap %ebx bswap %ecx movl %eax,(%edi) bswap %edx movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%ecx leal 16(%esi),%esi movl %esi,24(%esp) leal 16(%edi),%edx movl %edx,28(%esp) subl $16,%ecx testl $4294967280,%ecx movl %ecx,32(%esp) jnz .L021enc_loop testl $15,%ecx jnz .L020enc_tail movl 40(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 20(%esp),%esp popfl .L016enc_out: popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 4 .L020enc_tail: movl %edi,%eax movl 28(%esp),%edi pushl %eax movl $16,%ebx subl %ecx,%ebx cmpl %esi,%edi je .L022enc_in_place .align 4 .long 2767451785 jmp .L023enc_skip_in_place .L022enc_in_place: leal (%edi,%ecx,1),%edi .L023enc_skip_in_place: movl %ebx,%ecx xorl %eax,%eax .align 4 .long 2868115081 popl %edi movl 28(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl $16,32(%esp) jmp .L021enc_loop .align 16 .L019DECRYPT: shll $6,%edx leal (%eax,%edx,1),%edx movl %eax,16(%esp) movl %edx,36(%esp) cmpl 28(%esp),%esi je .L024dec_in_place movl 40(%esp),%edi movl %edi,44(%esp) .align 4 .L025dec_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 44(%esp),%edi movl 32(%esp),%esi bswap %eax bswap %ebx bswap %ecx xorl (%edi),%eax bswap %edx xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx subl $16,%esi jc .L026dec_partial movl %esi,32(%esp) movl 24(%esp),%esi movl 28(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl %esi,44(%esp) leal 16(%esi),%esi movl %esi,24(%esp) leal 16(%edi),%edi movl %edi,28(%esp) jnz .L025dec_loop movl 44(%esp),%edi .L027dec_end: movl 40(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) jmp .L028dec_out .align 4 .L026dec_partial: leal 44(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) leal 16(%esi),%ecx movl %edi,%esi movl 28(%esp),%edi .long 2767451785 movl 24(%esp),%edi jmp .L027dec_end .align 4 .L024dec_in_place: .L029dec_in_place_loop: leal 44(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) bswap %eax movl %edx,12(%edi) bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 40(%esp),%edi movl 28(%esp),%esi bswap %eax bswap %ebx bswap %ecx xorl (%edi),%eax bswap %edx xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,28(%esp) leal 44(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 24(%esp),%esi leal 16(%esi),%esi movl %esi,24(%esp) movl 32(%esp),%ecx subl $16,%ecx jc .L030dec_in_place_partial movl %ecx,32(%esp) jnz .L029dec_in_place_loop jmp .L028dec_out .align 4 .L030dec_in_place_partial: movl 28(%esp),%edi leal 44(%esp),%esi leal (%edi,%ecx,1),%edi leal 16(%esi,%ecx,1),%esi negl %ecx .long 2767451785 .align 4 .L028dec_out: movl 20(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_cbc_encrypt,.-.L_Camellia_cbc_encrypt_begin .byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 #else .file "cmll-x86.S" .text .globl Camellia_EncryptBlock_Rounds .type Camellia_EncryptBlock_Rounds,@function .align 16 Camellia_EncryptBlock_Rounds: .L_Camellia_EncryptBlock_Rounds_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax leal (%edi,%eax,1),%eax movl %ebx,20(%esp) movl %eax,16(%esp) call .L000pic_point .L000pic_point: popl %ebp leal .LCamellia_SBOX-.L000pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 20(%esp),%esp bswap %eax movl 32(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_EncryptBlock_Rounds,.-.L_Camellia_EncryptBlock_Rounds_begin .globl Camellia_EncryptBlock .type Camellia_EncryptBlock,@function .align 16 Camellia_EncryptBlock: .L_Camellia_EncryptBlock_begin: movl $128,%eax subl 4(%esp),%eax movl $3,%eax adcl $0,%eax movl %eax,4(%esp) jmp .L_Camellia_EncryptBlock_Rounds_begin .size Camellia_EncryptBlock,.-.L_Camellia_EncryptBlock_begin .globl Camellia_encrypt .type Camellia_encrypt,@function .align 16 Camellia_encrypt: .L_Camellia_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp movl 272(%edi),%eax leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax leal (%edi,%eax,1),%eax movl %ebx,20(%esp) movl %eax,16(%esp) call .L001pic_point .L001pic_point: popl %ebp leal .LCamellia_SBOX-.L001pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 20(%esp),%esp bswap %eax movl 24(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_encrypt,.-.L_Camellia_encrypt_begin .type _x86_Camellia_encrypt,@function .align 16 _x86_Camellia_encrypt: xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl 16(%edi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) .align 16 .L002loop: xorl %esi,%eax xorl 20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 24(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 32(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl 36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 40(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 48(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl 52(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 56(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl 60(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 64(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) addl $64,%edi cmpl 20(%esp),%edi je .L003done andl %eax,%esi movl 16(%esp),%edx roll $1,%esi movl %edx,%ecx xorl %esi,%ebx orl 12(%edi),%ecx movl %ebx,8(%esp) xorl 12(%esp),%ecx movl 4(%edi),%esi movl %ecx,12(%esp) orl %ebx,%esi andl 8(%edi),%ecx xorl %esi,%eax roll $1,%ecx movl %eax,4(%esp) xorl %ecx,%edx movl 16(%edi),%esi movl %edx,16(%esp) jmp .L002loop .align 8 .L003done: movl %eax,%ecx movl %ebx,%edx movl 12(%esp),%eax movl 16(%esp),%ebx xorl %esi,%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx ret .size _x86_Camellia_encrypt,.-_x86_Camellia_encrypt .globl Camellia_DecryptBlock_Rounds .type Camellia_DecryptBlock_Rounds,@function .align 16 Camellia_DecryptBlock_Rounds: .L_Camellia_DecryptBlock_Rounds_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax movl %edi,16(%esp) leal (%edi,%eax,1),%edi movl %ebx,20(%esp) call .L004pic_point .L004pic_point: popl %ebp leal .LCamellia_SBOX-.L004pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 20(%esp),%esp bswap %eax movl 32(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_DecryptBlock_Rounds,.-.L_Camellia_DecryptBlock_Rounds_begin .globl Camellia_DecryptBlock .type Camellia_DecryptBlock,@function .align 16 Camellia_DecryptBlock: .L_Camellia_DecryptBlock_begin: movl $128,%eax subl 4(%esp),%eax movl $3,%eax adcl $0,%eax movl %eax,4(%esp) jmp .L_Camellia_DecryptBlock_Rounds_begin .size Camellia_DecryptBlock,.-.L_Camellia_DecryptBlock_begin .globl Camellia_decrypt .type Camellia_decrypt,@function .align 16 Camellia_decrypt: .L_Camellia_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 28(%esp),%edi movl %esp,%ebx subl $28,%esp andl $-64,%esp movl 272(%edi),%eax leal -127(%edi),%ecx subl %esp,%ecx negl %ecx andl $960,%ecx subl %ecx,%esp addl $4,%esp shll $6,%eax movl %edi,16(%esp) leal (%edi,%eax,1),%edi movl %ebx,20(%esp) call .L005pic_point .L005pic_point: popl %ebp leal .LCamellia_SBOX-.L005pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 20(%esp),%esp bswap %eax movl 24(%esp),%esi bswap %ebx bswap %ecx bswap %edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_decrypt,.-.L_Camellia_decrypt_begin .type _x86_Camellia_decrypt,@function .align 16 _x86_Camellia_decrypt: xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl -8(%edi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) .align 16 .L006loop: xorl %esi,%eax xorl -4(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -16(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -12(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -24(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl -20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -32(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -40(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) xorl %esi,%eax xorl -36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 16(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 12(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl -48(%edi),%esi xorl %ecx,%edx movl %edx,16(%esp) xorl %ebx,%ecx movl %ecx,12(%esp) xorl %esi,%ecx xorl -44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 8(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl 4(%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl -56(%edi),%esi xorl %eax,%ebx movl %ebx,8(%esp) xorl %edx,%eax movl %eax,4(%esp) subl $64,%edi cmpl 20(%esp),%edi je .L007done andl %eax,%esi movl 16(%esp),%edx roll $1,%esi movl %edx,%ecx xorl %esi,%ebx orl 4(%edi),%ecx movl %ebx,8(%esp) xorl 12(%esp),%ecx movl 12(%edi),%esi movl %ecx,12(%esp) orl %ebx,%esi andl (%edi),%ecx xorl %esi,%eax roll $1,%ecx movl %eax,4(%esp) xorl %ecx,%edx movl -8(%edi),%esi movl %edx,16(%esp) jmp .L006loop .align 8 .L007done: movl %eax,%ecx movl %ebx,%edx movl 12(%esp),%eax movl 16(%esp),%ebx xorl %esi,%ecx xorl 12(%edi),%edx xorl (%edi),%eax xorl 4(%edi),%ebx ret .size _x86_Camellia_decrypt,.-_x86_Camellia_decrypt .globl Camellia_Ekeygen .type Camellia_Ekeygen,@function .align 16 Camellia_Ekeygen: .L_Camellia_Ekeygen_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $16,%esp movl 36(%esp),%ebp movl 40(%esp),%esi movl 44(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) cmpl $128,%ebp je .L0081st128 movl 16(%esi),%eax movl 20(%esi),%ebx cmpl $192,%ebp je .L0091st192 movl 24(%esi),%ecx movl 28(%esi),%edx jmp .L0101st256 .align 4 .L0091st192: movl %eax,%ecx movl %ebx,%edx notl %ecx notl %edx .align 4 .L0101st256: bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,32(%edi) movl %ebx,36(%edi) movl %ecx,40(%edi) movl %edx,44(%edi) xorl (%edi),%eax xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx .align 4 .L0081st128: call .L011pic_point .L011pic_point: popl %ebp leal .LCamellia_SBOX-.L011pic_point(%ebp),%ebp leal .LCamellia_SIGMA-.LCamellia_SBOX(%ebp),%edi movl (%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 4(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 8(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 12(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 16(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 44(%esp),%esi xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx xorl 12(%esi),%edx movl 16(%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 20(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 24(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 28(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 32(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 36(%esp),%esi cmpl $128,%esi jne .L0122nd256 movl 44(%esp),%edi leal 128(%edi),%edi movl %eax,-112(%edi) movl %ebx,-108(%edi) movl %ecx,-104(%edi) movl %edx,-100(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-80(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-76(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-72(%edi) movl %edx,-68(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-64(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-60(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-56(%edi) movl %edx,-52(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-32(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-28(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,-16(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,-12(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-8(%edi) movl %edx,-4(%edi) movl %ebx,%ebp shll $2,%ebx movl %ecx,%esi shrl $30,%esi shll $2,%ecx orl %esi,%ebx movl %edx,%esi shll $2,%edx movl %ebx,32(%edi) shrl $30,%esi orl %esi,%ecx shrl $30,%ebp movl %eax,%esi shrl $30,%esi movl %ecx,36(%edi) shll $2,%eax orl %esi,%edx orl %ebp,%eax movl %edx,40(%edi) movl %eax,44(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,64(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,68(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,72(%edi) movl %eax,76(%edi) movl -128(%edi),%ebx movl -124(%edi),%ecx movl -120(%edi),%edx movl -116(%edi),%eax movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-96(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-92(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-88(%edi) movl %eax,-84(%edi) movl %ebx,%ebp shll $30,%ebx movl %ecx,%esi shrl $2,%esi shll $30,%ecx orl %esi,%ebx movl %edx,%esi shll $30,%edx movl %ebx,-48(%edi) shrl $2,%esi orl %esi,%ecx shrl $2,%ebp movl %eax,%esi shrl $2,%esi movl %ecx,-44(%edi) shll $30,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-40(%edi) movl %eax,-36(%edi) movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-24(%edi) movl %eax,-20(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,4(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,8(%edi) movl %eax,12(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,16(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,20(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,24(%edi) movl %eax,28(%edi) movl %ebx,%ebp shll $17,%ebx movl %ecx,%esi shrl $15,%esi shll $17,%ecx orl %esi,%ebx movl %edx,%esi shll $17,%edx movl %ebx,48(%edi) shrl $15,%esi orl %esi,%ecx shrl $15,%ebp movl %eax,%esi shrl $15,%esi movl %ecx,52(%edi) shll $17,%eax orl %esi,%edx orl %ebp,%eax movl %edx,56(%edi) movl %eax,60(%edi) movl $3,%eax jmp .L013done .align 16 .L0122nd256: movl 44(%esp),%esi movl %eax,48(%esi) movl %ebx,52(%esi) movl %ecx,56(%esi) movl %edx,60(%esi) xorl 32(%esi),%eax xorl 36(%esi),%ebx xorl 40(%esi),%ecx xorl 44(%esi),%edx movl 32(%edi),%esi movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) xorl %esi,%eax xorl 36(%edi),%ebx movzbl %ah,%esi movl 2052(%ebp,%esi,8),%edx movzbl %al,%esi xorl 4(%ebp,%esi,8),%edx shrl $16,%eax movzbl %bl,%esi movl (%ebp,%esi,8),%ecx movzbl %ah,%esi xorl (%ebp,%esi,8),%edx movzbl %bh,%esi xorl 4(%ebp,%esi,8),%ecx shrl $16,%ebx movzbl %al,%eax xorl 2048(%ebp,%eax,8),%edx movzbl %bh,%esi movl 12(%esp),%eax xorl %edx,%ecx rorl $8,%edx xorl 2048(%ebp,%esi,8),%ecx movzbl %bl,%esi movl 8(%esp),%ebx xorl %eax,%edx xorl 2052(%ebp,%esi,8),%ecx movl 40(%edi),%esi xorl %ecx,%edx movl %edx,12(%esp) xorl %ebx,%ecx movl %ecx,8(%esp) xorl %esi,%ecx xorl 44(%edi),%edx movzbl %ch,%esi movl 2052(%ebp,%esi,8),%ebx movzbl %cl,%esi xorl 4(%ebp,%esi,8),%ebx shrl $16,%ecx movzbl %dl,%esi movl (%ebp,%esi,8),%eax movzbl %ch,%esi xorl (%ebp,%esi,8),%ebx movzbl %dh,%esi xorl 4(%ebp,%esi,8),%eax shrl $16,%edx movzbl %cl,%ecx xorl 2048(%ebp,%ecx,8),%ebx movzbl %dh,%esi movl 4(%esp),%ecx xorl %ebx,%eax rorl $8,%ebx xorl 2048(%ebp,%esi,8),%eax movzbl %dl,%esi movl (%esp),%edx xorl %ecx,%ebx xorl 2052(%ebp,%esi,8),%eax movl 48(%edi),%esi xorl %eax,%ebx movl %ebx,4(%esp) xorl %edx,%eax movl %eax,(%esp) movl 8(%esp),%ecx movl 12(%esp),%edx movl 44(%esp),%edi leal 128(%edi),%edi movl %eax,-112(%edi) movl %ebx,-108(%edi) movl %ecx,-104(%edi) movl %edx,-100(%edi) movl %eax,%ebp shll $30,%eax movl %ebx,%esi shrl $2,%esi shll $30,%ebx orl %esi,%eax movl %ecx,%esi shll $30,%ecx movl %eax,-48(%edi) shrl $2,%esi orl %esi,%ebx shrl $2,%ebp movl %edx,%esi shrl $2,%esi movl %ebx,-44(%edi) shll $30,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-40(%edi) movl %edx,-36(%edi) movl %eax,%ebp shll $30,%eax movl %ebx,%esi shrl $2,%esi shll $30,%ebx orl %esi,%eax movl %ecx,%esi shll $30,%ecx movl %eax,32(%edi) shrl $2,%esi orl %esi,%ebx shrl $2,%ebp movl %edx,%esi shrl $2,%esi movl %ebx,36(%edi) shll $30,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,40(%edi) movl %edx,44(%edi) movl %ebx,%ebp shll $19,%ebx movl %ecx,%esi shrl $13,%esi shll $19,%ecx orl %esi,%ebx movl %edx,%esi shll $19,%edx movl %ebx,128(%edi) shrl $13,%esi orl %esi,%ecx shrl $13,%ebp movl %eax,%esi shrl $13,%esi movl %ecx,132(%edi) shll $19,%eax orl %esi,%edx orl %ebp,%eax movl %edx,136(%edi) movl %eax,140(%edi) movl -96(%edi),%ebx movl -92(%edi),%ecx movl -88(%edi),%edx movl -84(%edi),%eax movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-96(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-92(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-88(%edi) movl %eax,-84(%edi) movl %ebx,%ebp shll $15,%ebx movl %ecx,%esi shrl $17,%esi shll $15,%ecx orl %esi,%ebx movl %edx,%esi shll $15,%edx movl %ebx,-64(%edi) shrl $17,%esi orl %esi,%ecx shrl $17,%ebp movl %eax,%esi shrl $17,%esi movl %ecx,-60(%edi) shll $15,%eax orl %esi,%edx orl %ebp,%eax movl %edx,-56(%edi) movl %eax,-52(%edi) movl %ebx,%ebp shll $30,%ebx movl %ecx,%esi shrl $2,%esi shll $30,%ecx orl %esi,%ebx movl %edx,%esi shll $30,%edx movl %ebx,16(%edi) shrl $2,%esi orl %esi,%ecx shrl $2,%ebp movl %eax,%esi shrl $2,%esi movl %ecx,20(%edi) shll $30,%eax orl %esi,%edx orl %ebp,%eax movl %edx,24(%edi) movl %eax,28(%edi) movl %ecx,%ebp shll $2,%ecx movl %edx,%esi shrl $30,%esi shll $2,%edx orl %esi,%ecx movl %eax,%esi shll $2,%eax movl %ecx,80(%edi) shrl $30,%esi orl %esi,%edx shrl $30,%ebp movl %ebx,%esi shrl $30,%esi movl %edx,84(%edi) shll $2,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,88(%edi) movl %ebx,92(%edi) movl -80(%edi),%ecx movl -76(%edi),%edx movl -72(%edi),%eax movl -68(%edi),%ebx movl %ecx,%ebp shll $15,%ecx movl %edx,%esi shrl $17,%esi shll $15,%edx orl %esi,%ecx movl %eax,%esi shll $15,%eax movl %ecx,-80(%edi) shrl $17,%esi orl %esi,%edx shrl $17,%ebp movl %ebx,%esi shrl $17,%esi movl %edx,-76(%edi) shll $15,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,-72(%edi) movl %ebx,-68(%edi) movl %ecx,%ebp shll $30,%ecx movl %edx,%esi shrl $2,%esi shll $30,%edx orl %esi,%ecx movl %eax,%esi shll $30,%eax movl %ecx,-16(%edi) shrl $2,%esi orl %esi,%edx shrl $2,%ebp movl %ebx,%esi shrl $2,%esi movl %edx,-12(%edi) shll $30,%ebx orl %esi,%eax orl %ebp,%ebx movl %eax,-8(%edi) movl %ebx,-4(%edi) movl %edx,64(%edi) movl %eax,68(%edi) movl %ebx,72(%edi) movl %ecx,76(%edi) movl %edx,%ebp shll $17,%edx movl %eax,%esi shrl $15,%esi shll $17,%eax orl %esi,%edx movl %ebx,%esi shll $17,%ebx movl %edx,96(%edi) shrl $15,%esi orl %esi,%eax shrl $15,%ebp movl %ecx,%esi shrl $15,%esi movl %eax,100(%edi) shll $17,%ecx orl %esi,%ebx orl %ebp,%ecx movl %ebx,104(%edi) movl %ecx,108(%edi) movl -128(%edi),%edx movl -124(%edi),%eax movl -120(%edi),%ebx movl -116(%edi),%ecx movl %eax,%ebp shll $13,%eax movl %ebx,%esi shrl $19,%esi shll $13,%ebx orl %esi,%eax movl %ecx,%esi shll $13,%ecx movl %eax,-32(%edi) shrl $19,%esi orl %esi,%ebx shrl $19,%ebp movl %edx,%esi shrl $19,%esi movl %ebx,-28(%edi) shll $13,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,-24(%edi) movl %edx,-20(%edi) movl %eax,%ebp shll $15,%eax movl %ebx,%esi shrl $17,%esi shll $15,%ebx orl %esi,%eax movl %ecx,%esi shll $15,%ecx movl %eax,(%edi) shrl $17,%esi orl %esi,%ebx shrl $17,%ebp movl %edx,%esi shrl $17,%esi movl %ebx,4(%edi) shll $15,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,8(%edi) movl %edx,12(%edi) movl %eax,%ebp shll $17,%eax movl %ebx,%esi shrl $15,%esi shll $17,%ebx orl %esi,%eax movl %ecx,%esi shll $17,%ecx movl %eax,48(%edi) shrl $15,%esi orl %esi,%ebx shrl $15,%ebp movl %edx,%esi shrl $15,%esi movl %ebx,52(%edi) shll $17,%edx orl %esi,%ecx orl %ebp,%edx movl %ecx,56(%edi) movl %edx,60(%edi) movl %ebx,%ebp shll $2,%ebx movl %ecx,%esi shrl $30,%esi shll $2,%ecx orl %esi,%ebx movl %edx,%esi shll $2,%edx movl %ebx,112(%edi) shrl $30,%esi orl %esi,%ecx shrl $30,%ebp movl %eax,%esi shrl $30,%esi movl %ecx,116(%edi) shll $2,%eax orl %esi,%edx orl %ebp,%eax movl %edx,120(%edi) movl %eax,124(%edi) movl $4,%eax .L013done: leal 144(%edi),%edx addl $16,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_Ekeygen,.-.L_Camellia_Ekeygen_begin .globl private_Camellia_set_key .type private_Camellia_set_key,@function .align 16 private_Camellia_set_key: .L_private_Camellia_set_key_begin: pushl %ebx movl 8(%esp),%ecx movl 12(%esp),%ebx movl 16(%esp),%edx movl $-1,%eax testl %ecx,%ecx jz .L014done testl %edx,%edx jz .L014done movl $-2,%eax cmpl $256,%ebx je .L015arg_ok cmpl $192,%ebx je .L015arg_ok cmpl $128,%ebx jne .L014done .align 4 .L015arg_ok: pushl %edx pushl %ecx pushl %ebx call .L_Camellia_Ekeygen_begin addl $12,%esp movl %eax,(%edx) xorl %eax,%eax .align 4 .L014done: popl %ebx ret .size private_Camellia_set_key,.-.L_private_Camellia_set_key_begin .align 64 .LCamellia_SIGMA: .long 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 .align 64 .LCamellia_SBOX: .long 1886416896,1886388336 .long 2189591040,741081132 .long 741092352,3014852787 .long 3974949888,3233808576 .long 3014898432,3840147684 .long 656877312,1465319511 .long 3233857536,3941204202 .long 3857048832,2930639022 .long 3840205824,589496355 .long 2240120064,1802174571 .long 1465341696,1162149957 .long 892679424,2779054245 .long 3941263872,3991732461 .long 202116096,1330577487 .long 2930683392,488439837 .long 1094795520,2459041938 .long 589505280,2256928902 .long 4025478912,2947481775 .long 1802201856,2088501372 .long 2475922176,522125343 .long 1162167552,1044250686 .long 421075200,3705405660 .long 2779096320,1583218782 .long 555819264,185270283 .long 3991792896,2795896998 .long 235802112,960036921 .long 1330597632,3587506389 .long 1313754624,1566376029 .long 488447232,3654877401 .long 1701143808,1515847770 .long 2459079168,1364262993 .long 3183328512,1819017324 .long 2256963072,2341142667 .long 3099113472,2593783962 .long 2947526400,4227531003 .long 2408550144,2964324528 .long 2088532992,1953759348 .long 3958106880,724238379 .long 522133248,4042260720 .long 3469659648,2223243396 .long 1044266496,3755933919 .long 808464384,3419078859 .long 3705461760,875823156 .long 1600085760,1987444854 .long 1583242752,1835860077 .long 3318072576,2846425257 .long 185273088,3520135377 .long 437918208,67371012 .long 2795939328,336855060 .long 3789676800,976879674 .long 960051456,3739091166 .long 3402287616,286326801 .long 3587560704,842137650 .long 1195853568,2627469468 .long 1566399744,1397948499 .long 1027423488,4075946226 .long 3654932736,4278059262 .long 16843008,3486449871 .long 1515870720,3284336835 .long 3604403712,2054815866 .long 1364283648,606339108 .long 1448498688,3907518696 .long 1819044864,1616904288 .long 1296911616,1768489065 .long 2341178112,2863268010 .long 218959104,2694840480 .long 2593823232,2711683233 .long 1717986816,1650589794 .long 4227595008,1414791252 .long 3435973632,505282590 .long 2964369408,3772776672 .long 757935360,1684275300 .long 1953788928,269484048 .long 303174144,0 .long 724249344,2745368739 .long 538976256,1970602101 .long 4042321920,2324299914 .long 2981212416,3873833190 .long 2223277056,151584777 .long 2576980224,3722248413 .long 3755990784,2273771655 .long 1280068608,2206400643 .long 3419130624,3452764365 .long 3267543552,2425356432 .long 875836416,1936916595 .long 2122219008,4143317238 .long 1987474944,2644312221 .long 84215040,3216965823 .long 1835887872,1381105746 .long 3082270464,3638034648 .long 2846468352,3368550600 .long 825307392,3334865094 .long 3520188672,2172715137 .long 387389184,1869545583 .long 67372032,320012307 .long 3621246720,1667432547 .long 336860160,3924361449 .long 1482184704,2812739751 .long 976894464,2677997727 .long 1633771776,3166437564 .long 3739147776,690552873 .long 454761216,4193845497 .long 286331136,791609391 .long 471604224,3031695540 .long 842150400,2021130360 .long 252645120,101056518 .long 2627509248,3890675943 .long 370546176,1903231089 .long 1397969664,3570663636 .long 404232192,2880110763 .long 4076007936,2290614408 .long 572662272,2374828173 .long 4278124032,1920073842 .long 1145324544,3115909305 .long 3486502656,4177002744 .long 2998055424,2896953516 .long 3284386560,909508662 .long 3048584448,707395626 .long 2054846976,1010565180 .long 2442236160,4059103473 .long 606348288,1077936192 .long 134744064,3553820883 .long 3907577856,3149594811 .long 2829625344,1128464451 .long 1616928768,353697813 .long 4244438016,2913796269 .long 1768515840,2004287607 .long 1347440640,2155872384 .long 2863311360,2189557890 .long 3503345664,3974889708 .long 2694881280,656867367 .long 2105376000,3856990437 .long 2711724288,2240086149 .long 2307492096,892665909 .long 1650614784,202113036 .long 2543294208,1094778945 .long 1414812672,4025417967 .long 1532713728,2475884691 .long 505290240,421068825 .long 2509608192,555810849 .long 3772833792,235798542 .long 4294967040,1313734734 .long 1684300800,1701118053 .long 3537031680,3183280317 .long 269488128,3099066552 .long 3301229568,2408513679 .long 0,3958046955 .long 1212696576,3469607118 .long 2745410304,808452144 .long 4160222976,1600061535 .long 1970631936,3318022341 .long 3688618752,437911578 .long 2324335104,3789619425 .long 50529024,3402236106 .long 3873891840,1195835463 .long 3671775744,1027407933 .long 151587072,16842753 .long 1061109504,3604349142 .long 3722304768,1448476758 .long 2492765184,1296891981 .long 2273806080,218955789 .long 1549556736,1717960806 .long 2206434048,3435921612 .long 33686016,757923885 .long 3452816640,303169554 .long 1246382592,538968096 .long 2425393152,2981167281 .long 858993408,2576941209 .long 1936945920,1280049228 .long 1734829824,3267494082 .long 4143379968,2122186878 .long 4092850944,84213765 .long 2644352256,3082223799 .long 2139062016,825294897 .long 3217014528,387383319 .long 3806519808,3621191895 .long 1381126656,1482162264 .long 2610666240,1633747041 .long 3638089728,454754331 .long 640034304,471597084 .long 3368601600,252641295 .long 926365440,370540566 .long 3334915584,404226072 .long 993737472,572653602 .long 2172748032,1145307204 .long 2526451200,2998010034 .long 1869573888,3048538293 .long 1263225600,2442199185 .long 320017152,134742024 .long 3200171520,2829582504 .long 1667457792,4244373756 .long 774778368,1347420240 .long 3924420864,3503292624 .long 2038003968,2105344125 .long 2812782336,2307457161 .long 2358021120,2543255703 .long 2678038272,1532690523 .long 1852730880,2509570197 .long 3166485504,4294902015 .long 2391707136,3536978130 .long 690563328,3301179588 .long 4126536960,1212678216 .long 4193908992,4160159991 .long 3065427456,3688562907 .long 791621376,50528259 .long 4261281024,3671720154 .long 3031741440,1061093439 .long 1499027712,2492727444 .long 2021160960,1549533276 .long 2560137216,33685506 .long 101058048,1246363722 .long 1785358848,858980403 .long 3890734848,1734803559 .long 1179010560,4092788979 .long 1903259904,2139029631 .long 3132799488,3806462178 .long 3570717696,2610626715 .long 623191296,640024614 .long 2880154368,926351415 .long 1111638528,993722427 .long 2290649088,2526412950 .long 2728567296,1263206475 .long 2374864128,3200123070 .long 4210752000,774766638 .long 1920102912,2037973113 .long 117901056,2357985420 .long 3115956480,1852702830 .long 1431655680,2391670926 .long 4177065984,4126474485 .long 4008635904,3065381046 .long 2896997376,4261216509 .long 168430080,1499005017 .long 909522432,2560098456 .long 1229539584,1785331818 .long 707406336,1178992710 .long 1751672832,3132752058 .long 1010580480,623181861 .long 943208448,1111621698 .long 4059164928,2728525986 .long 2762253312,4210688250 .long 1077952512,117899271 .long 673720320,1431634005 .long 3553874688,4008575214 .long 2071689984,168427530 .long 3149642496,1229520969 .long 3385444608,1751646312 .long 1128481536,943194168 .long 3250700544,2762211492 .long 353703168,673710120 .long 3823362816,2071658619 .long 2913840384,3385393353 .long 4109693952,3250651329 .long 2004317952,3823304931 .long 3351758592,4109631732 .long 2155905024,3351707847 .long 2661195264,2661154974 .long 14737632,939538488 .long 328965,1090535745 .long 5789784,369104406 .long 14277081,1979741814 .long 6776679,3640711641 .long 5131854,2466288531 .long 8487297,1610637408 .long 13355979,4060148466 .long 13224393,1912631922 .long 723723,3254829762 .long 11447982,2868947883 .long 6974058,2583730842 .long 14013909,1962964341 .long 1579032,100664838 .long 6118749,1459640151 .long 8553090,2684395680 .long 4605510,2432733585 .long 14671839,4144035831 .long 14079702,3036722613 .long 2565927,3372272073 .long 9079434,2717950626 .long 3289650,2348846220 .long 4934475,3523269330 .long 4342338,2415956112 .long 14408667,4127258358 .long 1842204,117442311 .long 10395294,2801837991 .long 10263708,654321447 .long 3815994,2382401166 .long 13290186,2986390194 .long 2434341,1224755529 .long 8092539,3724599006 .long 855309,1124090691 .long 7434609,1543527516 .long 6250335,3607156695 .long 2039583,3338717127 .long 16316664,1040203326 .long 14145495,4110480885 .long 4079166,2399178639 .long 10329501,1728079719 .long 8158332,520101663 .long 6316128,402659352 .long 12171705,1845522030 .long 12500670,2936057775 .long 12369084,788541231 .long 9145227,3791708898 .long 1447446,2231403909 .long 3421236,218107149 .long 5066061,1392530259 .long 12829635,4026593520 .long 7500402,2617285788 .long 9803157,1694524773 .long 11250603,3925928682 .long 9342606,2734728099 .long 12237498,2919280302 .long 8026746,2650840734 .long 11776947,3959483628 .long 131586,2147516544 .long 11842740,754986285 .long 11382189,1795189611 .long 10658466,2818615464 .long 11316396,721431339 .long 14211288,905983542 .long 10132122,2785060518 .long 1513239,3305162181 .long 1710618,2248181382 .long 3487029,1291865421 .long 13421772,855651123 .long 16250871,4244700669 .long 10066329,1711302246 .long 6381921,1476417624 .long 5921370,2516620950 .long 15263976,973093434 .long 2368548,150997257 .long 5658198,2499843477 .long 4210752,268439568 .long 14803425,2013296760 .long 6513507,3623934168 .long 592137,1107313218 .long 3355443,3422604492 .long 12566463,4009816047 .long 10000536,637543974 .long 9934743,3842041317 .long 8750469,1627414881 .long 6842472,436214298 .long 16579836,1056980799 .long 15527148,989870907 .long 657930,2181071490 .long 14342874,3053500086 .long 7303023,3674266587 .long 5460819,3556824276 .long 6447714,2550175896 .long 10724259,3892373736 .long 3026478,2332068747 .long 526344,33554946 .long 11513775,3942706155 .long 2631720,167774730 .long 11579568,738208812 .long 7631988,486546717 .long 12763842,2952835248 .long 12434877,1862299503 .long 3552822,2365623693 .long 2236962,2281736328 .long 3684408,234884622 .long 6579300,419436825 .long 1973790,2264958855 .long 3750201,1308642894 .long 2894892,184552203 .long 10921638,2835392937 .long 3158064,201329676 .long 15066597,2030074233 .long 4473924,285217041 .long 16645629,2130739071 .long 8947848,570434082 .long 10461087,3875596263 .long 6645093,1493195097 .long 8882055,3774931425 .long 7039851,3657489114 .long 16053492,1023425853 .long 2302755,3355494600 .long 4737096,301994514 .long 1052688,67109892 .long 13750737,1946186868 .long 5329233,1409307732 .long 12632256,805318704 .long 16382457,2113961598 .long 13816530,3019945140 .long 10526880,671098920 .long 5592405,1426085205 .long 10592673,1744857192 .long 4276545,1342197840 .long 16448250,3187719870 .long 4408131,3489714384 .long 1250067,3288384708 .long 12895428,822096177 .long 3092271,3405827019 .long 11053224,704653866 .long 11974326,2902502829 .long 3947580,251662095 .long 2829099,3389049546 .long 12698049,1879076976 .long 16777215,4278255615 .long 13158600,838873650 .long 10855845,1761634665 .long 2105376,134219784 .long 9013641,1644192354 .long 0,0 .long 9474192,603989028 .long 4671303,3506491857 .long 15724527,4211145723 .long 15395562,3120609978 .long 12040119,3976261101 .long 1381653,1157645637 .long 394758,2164294017 .long 13487565,1929409395 .long 11908533,1828744557 .long 1184274,2214626436 .long 8289918,2667618207 .long 12303291,3993038574 .long 2697513,1241533002 .long 986895,3271607235 .long 12105912,771763758 .long 460551,3238052289 .long 263172,16777473 .long 10197915,3858818790 .long 9737364,620766501 .long 2171169,1207978056 .long 6710886,2566953369 .long 15132390,3103832505 .long 13553358,3003167667 .long 15592941,2063629179 .long 15198183,4177590777 .long 3881787,3456159438 .long 16711422,3204497343 .long 8355711,3741376479 .long 12961221,1895854449 .long 10790052,687876393 .long 3618615,3439381965 .long 11645361,1811967084 .long 5000268,318771987 .long 9539985,1677747300 .long 7237230,2600508315 .long 9276813,1660969827 .long 7763574,2634063261 .long 197379,3221274816 .long 2960685,1258310475 .long 14606046,3070277559 .long 9868950,2768283045 .long 2500134,2298513801 .long 8224125,1593859935 .long 13027014,2969612721 .long 6052956,385881879 .long 13882323,4093703412 .long 15921906,3154164924 .long 5197647,3540046803 .long 1644825,1174423110 .long 4144959,3472936911 .long 14474460,922761015 .long 7960953,1577082462 .long 1907997,1191200583 .long 5395026,2483066004 .long 15461355,4194368250 .long 15987699,4227923196 .long 7171437,1526750043 .long 6184542,2533398423 .long 16514043,4261478142 .long 6908265,1509972570 .long 11711154,2885725356 .long 15790320,1006648380 .long 3223857,1275087948 .long 789516,50332419 .long 13948116,889206069 .long 13619151,4076925939 .long 9211020,587211555 .long 14869218,3087055032 .long 7697781,1560304989 .long 11119017,1778412138 .long 4868682,2449511058 .long 5723991,3573601749 .long 8684676,553656609 .long 1118481,1140868164 .long 4539717,1358975313 .long 1776411,3321939654 .long 16119285,2097184125 .long 15000804,956315961 .long 921102,2197848963 .long 7566195,3691044060 .long 11184810,2852170410 .long 15856113,2080406652 .long 14540253,1996519287 .long 5855577,1442862678 .long 1315860,83887365 .long 7105644,452991771 .long 9605778,2751505572 .long 5526612,352326933 .long 13684944,872428596 .long 7895160,503324190 .long 7368816,469769244 .long 14935011,4160813304 .long 4802889,1375752786 .long 8421504,536879136 .long 5263440,335549460 .long 10987431,3909151209 .long 16185078,3170942397 .long 7829367,3707821533 .long 9671571,3825263844 .long 8816262,2701173153 .long 8618883,3758153952 .long 2763306,2315291274 .long 13092807,4043370993 .long 5987163,3590379222 .long 15329769,2046851706 .long 15658734,3137387451 .long 9408399,3808486371 .long 65793,1073758272 .long 4013373,1325420367 .globl Camellia_cbc_encrypt .type Camellia_cbc_encrypt,@function .align 16 Camellia_cbc_encrypt: .L_Camellia_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ecx cmpl $0,%ecx je .L016enc_out pushfl cld movl 24(%esp),%eax movl 28(%esp),%ebx movl 36(%esp),%edx movl 40(%esp),%ebp leal -64(%esp),%esi andl $-64,%esi leal -127(%edx),%edi subl %esi,%edi negl %edi andl $960,%edi subl %edi,%esi movl 44(%esp),%edi xchgl %esi,%esp addl $4,%esp movl %esi,20(%esp) movl %eax,24(%esp) movl %ebx,28(%esp) movl %ecx,32(%esp) movl %edx,36(%esp) movl %ebp,40(%esp) call .L017pic_point .L017pic_point: popl %ebp leal .LCamellia_SBOX-.L017pic_point(%ebp),%ebp movl $32,%esi .align 4 .L018prefetch_sbox: movl (%ebp),%eax movl 32(%ebp),%ebx movl 64(%ebp),%ecx movl 96(%ebp),%edx leal 128(%ebp),%ebp decl %esi jnz .L018prefetch_sbox movl 36(%esp),%eax subl $4096,%ebp movl 24(%esp),%esi movl 272(%eax),%edx cmpl $0,%edi je .L019DECRYPT movl 32(%esp),%ecx movl 40(%esp),%edi shll $6,%edx leal (%eax,%edx,1),%edx movl %edx,16(%esp) testl $4294967280,%ecx jz .L020enc_tail movl (%edi),%eax movl 4(%edi),%ebx .align 4 .L021enc_loop: movl 8(%edi),%ecx movl 12(%edi),%edx xorl (%esi),%eax xorl 4(%esi),%ebx xorl 8(%esi),%ecx bswap %eax xorl 12(%esi),%edx bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_encrypt movl 24(%esp),%esi movl 28(%esp),%edi bswap %eax bswap %ebx bswap %ecx movl %eax,(%edi) bswap %edx movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 32(%esp),%ecx leal 16(%esi),%esi movl %esi,24(%esp) leal 16(%edi),%edx movl %edx,28(%esp) subl $16,%ecx testl $4294967280,%ecx movl %ecx,32(%esp) jnz .L021enc_loop testl $15,%ecx jnz .L020enc_tail movl 40(%esp),%esi movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) movl 20(%esp),%esp popfl .L016enc_out: popl %edi popl %esi popl %ebx popl %ebp ret pushfl .align 4 .L020enc_tail: movl %edi,%eax movl 28(%esp),%edi pushl %eax movl $16,%ebx subl %ecx,%ebx cmpl %esi,%edi je .L022enc_in_place .align 4 .long 2767451785 jmp .L023enc_skip_in_place .L022enc_in_place: leal (%edi,%ecx,1),%edi .L023enc_skip_in_place: movl %ebx,%ecx xorl %eax,%eax .align 4 .long 2868115081 popl %edi movl 28(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl $16,32(%esp) jmp .L021enc_loop .align 16 .L019DECRYPT: shll $6,%edx leal (%eax,%edx,1),%edx movl %eax,16(%esp) movl %edx,36(%esp) cmpl 28(%esp),%esi je .L024dec_in_place movl 40(%esp),%edi movl %edi,44(%esp) .align 4 .L025dec_loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx bswap %eax movl 12(%esi),%edx bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 44(%esp),%edi movl 32(%esp),%esi bswap %eax bswap %ebx bswap %ecx xorl (%edi),%eax bswap %edx xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx subl $16,%esi jc .L026dec_partial movl %esi,32(%esp) movl 24(%esp),%esi movl 28(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl %esi,44(%esp) leal 16(%esi),%esi movl %esi,24(%esp) leal 16(%edi),%edi movl %edi,28(%esp) jnz .L025dec_loop movl 44(%esp),%edi .L027dec_end: movl 40(%esp),%esi movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) jmp .L028dec_out .align 4 .L026dec_partial: leal 44(%esp),%edi movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) leal 16(%esi),%ecx movl %edi,%esi movl 28(%esp),%edi .long 2767451785 movl 24(%esp),%edi jmp .L027dec_end .align 4 .L024dec_in_place: .L029dec_in_place_loop: leal 44(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) bswap %eax movl %edx,12(%edi) bswap %ebx movl 36(%esp),%edi bswap %ecx bswap %edx call _x86_Camellia_decrypt movl 40(%esp),%edi movl 28(%esp),%esi bswap %eax bswap %ebx bswap %ecx xorl (%edi),%eax bswap %edx xorl 4(%edi),%ebx xorl 8(%edi),%ecx xorl 12(%edi),%edx movl %eax,(%esi) movl %ebx,4(%esi) movl %ecx,8(%esi) movl %edx,12(%esi) leal 16(%esi),%esi movl %esi,28(%esp) leal 44(%esp),%esi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl %eax,(%edi) movl %ebx,4(%edi) movl %ecx,8(%edi) movl %edx,12(%edi) movl 24(%esp),%esi leal 16(%esi),%esi movl %esi,24(%esp) movl 32(%esp),%ecx subl $16,%ecx jc .L030dec_in_place_partial movl %ecx,32(%esp) jnz .L029dec_in_place_loop jmp .L028dec_out .align 4 .L030dec_in_place_partial: movl 28(%esp),%edi leal 44(%esp),%esi leal (%edi,%ecx,1),%edi leal 16(%esi,%ecx,1),%esi negl %ecx .long 2767451785 .align 4 .L028dec_out: movl 20(%esp),%esp popfl popl %edi popl %esi popl %ebx popl %ebp ret .size Camellia_cbc_encrypt,.-.L_Camellia_cbc_encrypt_begin .byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 #endif Index: head/secure/lib/libcrypto/i386/co-586.S =================================================================== --- head/secure/lib/libcrypto/i386/co-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/co-586.S (revision 299481) @@ -1,2512 +1,2513 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from co-586.pl. #ifdef PIC .file "co-586.S" .text .globl bn_mul_comba8 .type bn_mul_comba8,@function .align 16 bn_mul_comba8: .L_bn_mul_comba8_begin: pushl %esi movl 12(%esp),%esi pushl %edi movl 20(%esp),%edi pushl %ebp pushl %ebx xorl %ebx,%ebx movl (%esi),%eax xorl %ecx,%ecx movl (%edi),%edx xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,(%eax) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,4(%eax) movl 8(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,8(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,12(%eax) movl 16(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,16(%eax) movl 20(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 12(%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 16(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,20(%eax) movl 24(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 16(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 12(%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 16(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 20(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,24(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 16(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 20(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 24(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx movl %ecx,28(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 24(%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 16(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 12(%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 24(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 28(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx movl %ebp,32(%eax) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 24(%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 16(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 16(%esi),%eax adcl %edx,%ecx movl 20(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 12(%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 28(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp movl %ebx,36(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esi),%eax adcl %edx,%ebp movl 20(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 16(%esi),%eax adcl %edx,%ebp movl 24(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx movl %ecx,40(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 24(%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esi),%eax adcl %edx,%ebx movl 24(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 28(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx movl %ebp,44(%eax) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 24(%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 28(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp movl %ebx,48(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx movl %ecx,52(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx adcl $0,%ecx movl %ebp,56(%eax) movl %ebx,60(%eax) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_mul_comba8,.-.L_bn_mul_comba8_begin .globl bn_mul_comba4 .type bn_mul_comba4,@function .align 16 bn_mul_comba4: .L_bn_mul_comba4_begin: pushl %esi movl 12(%esp),%esi pushl %edi movl 20(%esp),%edi pushl %ebp pushl %ebx xorl %ebx,%ebx movl (%esi),%eax xorl %ecx,%ecx movl (%edi),%edx xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,(%eax) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,4(%eax) movl 8(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,8(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp movl %ebx,12(%eax) movl 12(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx movl %ecx,16(%eax) movl 12(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx movl %ebp,20(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx adcl $0,%ebp movl %ebx,24(%eax) movl %ecx,28(%eax) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_mul_comba4,.-.L_bn_mul_comba4_begin .globl bn_sqr_comba8 .type bn_sqr_comba8,@function .align 16 bn_sqr_comba8: .L_bn_sqr_comba8_begin: pushl %esi pushl %edi pushl %ebp pushl %ebx movl 20(%esp),%edi movl 24(%esp),%esi xorl %ebx,%ebx xorl %ecx,%ecx movl (%esi),%eax xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,(%edi) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx movl %ecx,4(%edi) movl (%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 4(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl (%esi),%edx adcl $0,%ecx movl %ebp,8(%edi) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 8(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 16(%esi),%eax adcl $0,%ebp movl %ebx,12(%edi) movl (%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 12(%esi),%eax adcl $0,%ebx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl (%esi),%edx adcl $0,%ebx movl %ecx,16(%edi) movl 20(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 16(%esi),%eax adcl $0,%ecx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 12(%esi),%eax adcl $0,%ecx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl %ebp,20(%edi) movl (%esi),%edx xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 20(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 16(%esi),%eax adcl $0,%ebp movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 12(%esi),%eax adcl $0,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,24(%edi) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 24(%esi),%eax adcl $0,%ebx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 20(%esi),%eax adcl $0,%ebx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 16(%esi),%eax adcl $0,%ebx movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 28(%esi),%eax adcl $0,%ebx movl %ecx,28(%edi) movl 4(%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 20(%esi),%eax adcl $0,%ecx movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 16(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl 8(%esi),%edx adcl $0,%ecx movl %ebp,32(%edi) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%eax adcl $0,%ebp movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 20(%esi),%eax adcl $0,%ebp movl 16(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 28(%esi),%eax adcl $0,%ebp movl %ebx,36(%edi) movl 12(%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 24(%esi),%eax adcl $0,%ebx movl 16(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 20(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl 16(%esi),%edx adcl $0,%ebx movl %ecx,40(%edi) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl 20(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 28(%esi),%eax adcl $0,%ecx movl %ebp,44(%edi) movl 20(%esi),%edx xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%eax adcl $0,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%edx adcl $0,%ebp movl %ebx,48(%edi) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 28(%esi),%eax adcl $0,%ebx movl %ecx,52(%edi) xorl %ecx,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx adcl $0,%ecx movl %ebp,56(%edi) movl %ebx,60(%edi) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin .globl bn_sqr_comba4 .type bn_sqr_comba4,@function .align 16 bn_sqr_comba4: .L_bn_sqr_comba4_begin: pushl %esi pushl %edi pushl %ebp pushl %ebx movl 20(%esp),%edi movl 24(%esp),%esi xorl %ebx,%ebx xorl %ecx,%ecx movl (%esi),%eax xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,(%edi) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx movl %ecx,4(%edi) movl (%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 4(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl (%esi),%edx adcl $0,%ecx movl %ebp,8(%edi) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 8(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 12(%esi),%eax adcl $0,%ebp movl %ebx,12(%edi) movl 4(%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%edx adcl $0,%ebx movl %ecx,16(%edi) movl 12(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 12(%esi),%eax adcl $0,%ecx movl %ebp,20(%edi) xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx adcl $0,%ebp movl %ebx,24(%edi) movl %ecx,28(%edi) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin #else .file "co-586.S" .text .globl bn_mul_comba8 .type bn_mul_comba8,@function .align 16 bn_mul_comba8: .L_bn_mul_comba8_begin: pushl %esi movl 12(%esp),%esi pushl %edi movl 20(%esp),%edi pushl %ebp pushl %ebx xorl %ebx,%ebx movl (%esi),%eax xorl %ecx,%ecx movl (%edi),%edx xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,(%eax) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,4(%eax) movl 8(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,8(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,12(%eax) movl 16(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,16(%eax) movl 20(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 12(%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 16(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,20(%eax) movl 24(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 16(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 12(%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 16(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 20(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,24(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 16(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 20(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 24(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx movl %ecx,28(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 24(%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 16(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 12(%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 24(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 28(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx movl %ebp,32(%eax) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 24(%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 16(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 16(%esi),%eax adcl %edx,%ecx movl 20(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 12(%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 28(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp movl %ebx,36(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esi),%eax adcl %edx,%ebp movl 20(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 16(%esi),%eax adcl %edx,%ebp movl 24(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 12(%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 16(%edi),%edx adcl $0,%ebx movl %ecx,40(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 24(%esi),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esi),%eax adcl %edx,%ebx movl 24(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 16(%esi),%eax adcl %edx,%ebx movl 28(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 20(%edi),%edx adcl $0,%ecx movl %ebp,44(%eax) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 24(%esi),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esi),%eax adcl %edx,%ecx movl 28(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 24(%edi),%edx adcl $0,%ebp movl %ebx,48(%eax) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 24(%esi),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 28(%edi),%edx adcl $0,%ebx movl %ecx,52(%eax) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx adcl $0,%ecx movl %ebp,56(%eax) movl %ebx,60(%eax) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_mul_comba8,.-.L_bn_mul_comba8_begin .globl bn_mul_comba4 .type bn_mul_comba4,@function .align 16 bn_mul_comba4: .L_bn_mul_comba4_begin: pushl %esi movl 12(%esp),%esi pushl %edi movl 20(%esp),%edi pushl %ebp pushl %ebx xorl %ebx,%ebx movl (%esi),%eax xorl %ecx,%ecx movl (%edi),%edx xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl (%edi),%edx adcl $0,%ebp movl %ebx,(%eax) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl (%esi),%eax adcl %edx,%ebp movl 4(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl (%edi),%edx adcl $0,%ebx movl %ecx,4(%eax) movl 8(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 4(%esi),%eax adcl %edx,%ebx movl 4(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl (%esi),%eax adcl %edx,%ebx movl 8(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl (%edi),%edx adcl $0,%ecx movl %ebp,8(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 8(%esi),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 4(%esi),%eax adcl %edx,%ecx movl 8(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl (%esi),%eax adcl %edx,%ecx movl 12(%edi),%edx adcl $0,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx movl 4(%edi),%edx adcl $0,%ebp movl %ebx,12(%eax) movl 12(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%ecx movl 8(%esi),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 4(%esi),%eax adcl %edx,%ebp movl 12(%edi),%edx adcl $0,%ebx mull %edx addl %eax,%ecx movl 20(%esp),%eax adcl %edx,%ebp movl 8(%edi),%edx adcl $0,%ebx movl %ecx,16(%eax) movl 12(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%ebp movl 8(%esi),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx mull %edx addl %eax,%ebp movl 20(%esp),%eax adcl %edx,%ebx movl 12(%edi),%edx adcl $0,%ecx movl %ebp,20(%eax) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%ebx movl 20(%esp),%eax adcl %edx,%ecx adcl $0,%ebp movl %ebx,24(%eax) movl %ecx,28(%eax) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_mul_comba4,.-.L_bn_mul_comba4_begin .globl bn_sqr_comba8 .type bn_sqr_comba8,@function .align 16 bn_sqr_comba8: .L_bn_sqr_comba8_begin: pushl %esi pushl %edi pushl %ebp pushl %ebx movl 20(%esp),%edi movl 24(%esp),%esi xorl %ebx,%ebx xorl %ecx,%ecx movl (%esi),%eax xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,(%edi) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx movl %ecx,4(%edi) movl (%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 4(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl (%esi),%edx adcl $0,%ecx movl %ebp,8(%edi) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 8(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 16(%esi),%eax adcl $0,%ebp movl %ebx,12(%edi) movl (%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 12(%esi),%eax adcl $0,%ebx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl (%esi),%edx adcl $0,%ebx movl %ecx,16(%edi) movl 20(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 16(%esi),%eax adcl $0,%ecx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 12(%esi),%eax adcl $0,%ecx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl %ebp,20(%edi) movl (%esi),%edx xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 20(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 16(%esi),%eax adcl $0,%ebp movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 12(%esi),%eax adcl $0,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,24(%edi) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 24(%esi),%eax adcl $0,%ebx movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 20(%esi),%eax adcl $0,%ebx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 16(%esi),%eax adcl $0,%ebx movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 28(%esi),%eax adcl $0,%ebx movl %ecx,28(%edi) movl 4(%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl 8(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 20(%esi),%eax adcl $0,%ecx movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 16(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl 8(%esi),%edx adcl $0,%ecx movl %ebp,32(%edi) movl 28(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%eax adcl $0,%ebp movl 12(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 20(%esi),%eax adcl $0,%ebp movl 16(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 28(%esi),%eax adcl $0,%ebp movl %ebx,36(%edi) movl 12(%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 24(%esi),%eax adcl $0,%ebx movl 16(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 20(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl 16(%esi),%edx adcl $0,%ebx movl %ecx,40(%edi) movl 28(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 24(%esi),%eax adcl $0,%ecx movl 20(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 28(%esi),%eax adcl $0,%ecx movl %ebp,44(%edi) movl 20(%esi),%edx xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%eax adcl $0,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl 24(%esi),%edx adcl $0,%ebp movl %ebx,48(%edi) movl 28(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 28(%esi),%eax adcl $0,%ebx movl %ecx,52(%edi) xorl %ecx,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx adcl $0,%ecx movl %ebp,56(%edi) movl %ebx,60(%edi) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin .globl bn_sqr_comba4 .type bn_sqr_comba4,@function .align 16 bn_sqr_comba4: .L_bn_sqr_comba4_begin: pushl %esi pushl %edi pushl %ebp pushl %ebx movl 20(%esp),%edi movl 24(%esp),%esi xorl %ebx,%ebx xorl %ecx,%ecx movl (%esi),%eax xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx movl (%esi),%edx adcl $0,%ebp movl %ebx,(%edi) movl 4(%esi),%eax xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx movl %ecx,4(%edi) movl (%esi),%edx xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 4(%esi),%eax adcl $0,%ecx mull %eax addl %eax,%ebp adcl %edx,%ebx movl (%esi),%edx adcl $0,%ecx movl %ebp,8(%edi) movl 12(%esi),%eax xorl %ebp,%ebp mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 8(%esi),%eax adcl $0,%ebp movl 4(%esi),%edx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebp addl %eax,%ebx adcl %edx,%ecx movl 12(%esi),%eax adcl $0,%ebp movl %ebx,12(%edi) movl 4(%esi),%edx xorl %ebx,%ebx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ebx addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%eax adcl $0,%ebx mull %eax addl %eax,%ecx adcl %edx,%ebp movl 8(%esi),%edx adcl $0,%ebx movl %ecx,16(%edi) movl 12(%esi),%eax xorl %ecx,%ecx mull %edx addl %eax,%eax adcl %edx,%edx adcl $0,%ecx addl %eax,%ebp adcl %edx,%ebx movl 12(%esi),%eax adcl $0,%ecx movl %ebp,20(%edi) xorl %ebp,%ebp mull %eax addl %eax,%ebx adcl %edx,%ecx adcl $0,%ebp movl %ebx,24(%edi) movl %ecx,28(%edi) popl %ebx popl %ebp popl %edi popl %esi ret .size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin #endif Index: head/secure/lib/libcrypto/i386/crypt586.S =================================================================== --- head/secure/lib/libcrypto/i386/crypt586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/crypt586.S (revision 299481) @@ -1,1758 +1,1759 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from crypt586.pl. #ifdef PIC .file "crypt586.S" .text .globl fcrypt_body .type fcrypt_body,@function .align 16 fcrypt_body: .L_fcrypt_body_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edi,%edi xorl %esi,%esi call .L000PIC_me_up .L000PIC_me_up: popl %edx leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx movl DES_SPtrans@GOT(%edx),%edx pushl %edx movl 28(%esp),%ebp pushl $25 .L001start: movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl (%ebp),%ebx xorl %ebx,%eax movl 4(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 8(%ebp),%ebx xorl %ebx,%eax movl 12(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 16(%ebp),%ebx xorl %ebx,%eax movl 20(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 24(%ebp),%ebx xorl %ebx,%eax movl 28(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 32(%ebp),%ebx xorl %ebx,%eax movl 36(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 40(%ebp),%ebx xorl %ebx,%eax movl 44(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 48(%ebp),%ebx xorl %ebx,%eax movl 52(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 56(%ebp),%ebx xorl %ebx,%eax movl 60(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 64(%ebp),%ebx xorl %ebx,%eax movl 68(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 72(%ebp),%ebx xorl %ebx,%eax movl 76(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 80(%ebp),%ebx xorl %ebx,%eax movl 84(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 88(%ebp),%ebx xorl %ebx,%eax movl 92(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 96(%ebp),%ebx xorl %ebx,%eax movl 100(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 104(%ebp),%ebx xorl %ebx,%eax movl 108(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 112(%ebp),%ebx xorl %ebx,%eax movl 116(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 120(%ebp),%ebx xorl %ebx,%eax movl 124(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl (%esp),%ebx movl %edi,%eax decl %ebx movl %esi,%edi movl %eax,%esi movl %ebx,(%esp) jnz .L001start movl 28(%esp),%edx rorl $1,%edi movl %esi,%eax xorl %edi,%esi andl $0xaaaaaaaa,%esi xorl %esi,%eax xorl %esi,%edi roll $23,%eax movl %eax,%esi xorl %edi,%eax andl $0x03fc03fc,%eax xorl %eax,%esi xorl %eax,%edi roll $10,%esi movl %esi,%eax xorl %edi,%esi andl $0x33333333,%esi xorl %esi,%eax xorl %esi,%edi roll $18,%edi movl %edi,%esi xorl %eax,%edi andl $0xfff0000f,%edi xorl %edi,%esi xorl %edi,%eax roll $12,%esi movl %esi,%edi xorl %eax,%esi andl $0xf0f0f0f0,%esi xorl %esi,%edi xorl %esi,%eax rorl $4,%eax movl %eax,(%edx) movl %edi,4(%edx) addl $8,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size fcrypt_body,.-.L_fcrypt_body_begin #else .file "crypt586.S" .text .globl fcrypt_body .type fcrypt_body,@function .align 16 fcrypt_body: .L_fcrypt_body_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edi,%edi xorl %esi,%esi leal DES_SPtrans,%edx pushl %edx movl 28(%esp),%ebp pushl $25 .L000start: movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl (%ebp),%ebx xorl %ebx,%eax movl 4(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 8(%ebp),%ebx xorl %ebx,%eax movl 12(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 16(%ebp),%ebx xorl %ebx,%eax movl 20(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 24(%ebp),%ebx xorl %ebx,%eax movl 28(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 32(%ebp),%ebx xorl %ebx,%eax movl 36(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 40(%ebp),%ebx xorl %ebx,%eax movl 44(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 48(%ebp),%ebx xorl %ebx,%eax movl 52(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 56(%ebp),%ebx xorl %ebx,%eax movl 60(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 64(%ebp),%ebx xorl %ebx,%eax movl 68(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 72(%ebp),%ebx xorl %ebx,%eax movl 76(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 80(%ebp),%ebx xorl %ebx,%eax movl 84(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 88(%ebp),%ebx xorl %ebx,%eax movl 92(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 96(%ebp),%ebx xorl %ebx,%eax movl 100(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 104(%ebp),%ebx xorl %ebx,%eax movl 108(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %esi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 112(%ebp),%ebx xorl %ebx,%eax movl 116(%ebp),%ecx xorl %esi,%eax xorl %esi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%edi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%edi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%edi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%edi movl 32(%esp),%ebp movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx movl 40(%esp),%ecx xorl %edi,%edx andl %edx,%eax andl %ecx,%edx movl %eax,%ebx shll $16,%ebx movl %edx,%ecx shll $16,%ecx xorl %ebx,%eax xorl %ecx,%edx movl 120(%ebp),%ebx xorl %ebx,%eax movl 124(%ebp),%ecx xorl %edi,%eax xorl %edi,%edx xorl %ecx,%edx andl $0xfcfcfcfc,%eax xorl %ebx,%ebx andl $0xcfcfcfcf,%edx xorl %ecx,%ecx movb %al,%bl movb %ah,%cl rorl $4,%edx movl 4(%esp),%ebp xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx movl 0x600(%ebp,%ebx,1),%ebx xorl %ebx,%esi movl 0x700(%ebp,%ecx,1),%ebx xorl %ebx,%esi movl 0x400(%ebp,%eax,1),%ebx xorl %ebx,%esi movl 0x500(%ebp,%edx,1),%ebx xorl %ebx,%esi movl 32(%esp),%ebp movl (%esp),%ebx movl %edi,%eax decl %ebx movl %esi,%edi movl %eax,%esi movl %ebx,(%esp) jnz .L000start movl 28(%esp),%edx rorl $1,%edi movl %esi,%eax xorl %edi,%esi andl $0xaaaaaaaa,%esi xorl %esi,%eax xorl %esi,%edi roll $23,%eax movl %eax,%esi xorl %edi,%eax andl $0x03fc03fc,%eax xorl %eax,%esi xorl %eax,%edi roll $10,%esi movl %esi,%eax xorl %edi,%esi andl $0x33333333,%esi xorl %esi,%eax xorl %esi,%edi roll $18,%edi movl %edi,%esi xorl %eax,%edi andl $0xfff0000f,%edi xorl %edi,%esi xorl %edi,%eax roll $12,%esi movl %esi,%edi xorl %eax,%esi andl $0xf0f0f0f0,%esi xorl %esi,%edi xorl %esi,%eax rorl $4,%eax movl %eax,(%edx) movl %edi,4(%edx) addl $8,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size fcrypt_body,.-.L_fcrypt_body_begin #endif Index: head/secure/lib/libcrypto/i386/des-586.S =================================================================== --- head/secure/lib/libcrypto/i386/des-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/des-586.S (revision 299481) @@ -1,3680 +1,3681 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from des-586.pl. #ifdef PIC .file "des-586.S" .text .globl DES_SPtrans .type _x86_DES_encrypt,@function .align 16 _x86_DES_encrypt: pushl %ecx movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi addl $4,%esp ret .size _x86_DES_encrypt,.-_x86_DES_encrypt .type _x86_DES_decrypt,@function .align 16 _x86_DES_decrypt: pushl %ecx movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi addl $4,%esp ret .size _x86_DES_decrypt,.-_x86_DES_decrypt .globl DES_encrypt1 .type DES_encrypt1,@function .align 16 DES_encrypt1: .L_DES_encrypt1_begin: pushl %esi pushl %edi movl 12(%esp),%esi xorl %ecx,%ecx pushl %ebx pushl %ebp movl (%esi),%eax movl 28(%esp),%ebx movl 4(%esi),%edi roll $4,%eax movl %eax,%esi xorl %edi,%eax andl $0xf0f0f0f0,%eax xorl %eax,%esi xorl %eax,%edi roll $20,%edi movl %edi,%eax xorl %esi,%edi andl $0xfff0000f,%edi xorl %edi,%eax xorl %edi,%esi roll $14,%eax movl %eax,%edi xorl %esi,%eax andl $0x33333333,%eax xorl %eax,%edi xorl %eax,%esi roll $22,%esi movl %esi,%eax xorl %edi,%esi andl $0x03fc03fc,%esi xorl %esi,%eax xorl %esi,%edi roll $9,%eax movl %eax,%esi xorl %edi,%eax andl $0xaaaaaaaa,%eax xorl %eax,%esi xorl %eax,%edi roll $1,%edi call .L000pic_point .L000pic_point: popl %ebp leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L001decrypt call _x86_DES_encrypt jmp .L002done .L001decrypt: call _x86_DES_decrypt .L002done: movl 20(%esp),%edx rorl $1,%esi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%edx) movl %esi,4(%edx) popl %ebp popl %ebx popl %edi popl %esi ret .size DES_encrypt1,.-.L_DES_encrypt1_begin .globl DES_encrypt2 .type DES_encrypt2,@function .align 16 DES_encrypt2: .L_DES_encrypt2_begin: pushl %esi pushl %edi movl 12(%esp),%eax xorl %ecx,%ecx pushl %ebx pushl %ebp movl (%eax),%esi movl 28(%esp),%ebx roll $3,%esi movl 4(%eax),%edi roll $3,%edi call .L003pic_point .L003pic_point: popl %ebp leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L004decrypt call _x86_DES_encrypt jmp .L005done .L004decrypt: call _x86_DES_decrypt .L005done: rorl $3,%edi movl 20(%esp),%eax rorl $3,%esi movl %edi,(%eax) movl %esi,4(%eax) popl %ebp popl %ebx popl %edi popl %esi ret .size DES_encrypt2,.-.L_DES_encrypt2_begin .globl DES_encrypt3 .type DES_encrypt3,@function .align 16 DES_encrypt3: .L_DES_encrypt3_begin: pushl %ebx movl 8(%esp),%ebx pushl %ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp roll $4,%edi movl %edi,%edx xorl %esi,%edi andl $0xf0f0f0f0,%edi xorl %edi,%edx xorl %edi,%esi roll $20,%esi movl %esi,%edi xorl %edx,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%edx roll $14,%edi movl %edi,%esi xorl %edx,%edi andl $0x33333333,%edi xorl %edi,%esi xorl %edi,%edx roll $22,%edx movl %edx,%edi xorl %esi,%edx andl $0x03fc03fc,%edx xorl %edx,%edi xorl %edx,%esi roll $9,%edi movl %edi,%edx xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%edx xorl %edi,%esi rorl $3,%edx rorl $2,%esi movl %esi,4(%ebx) movl 36(%esp),%eax movl %edx,(%ebx) movl 40(%esp),%edi movl 44(%esp),%esi movl $1,8(%esp) movl %eax,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $0,8(%esp) movl %edi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $1,8(%esp) movl %esi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin addl $12,%esp movl (%ebx),%edi movl 4(%ebx),%esi roll $2,%esi roll $3,%edi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%ebx) movl %esi,4(%ebx) popl %edi popl %esi popl %ebp popl %ebx ret .size DES_encrypt3,.-.L_DES_encrypt3_begin .globl DES_decrypt3 .type DES_decrypt3,@function .align 16 DES_decrypt3: .L_DES_decrypt3_begin: pushl %ebx movl 8(%esp),%ebx pushl %ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp roll $4,%edi movl %edi,%edx xorl %esi,%edi andl $0xf0f0f0f0,%edi xorl %edi,%edx xorl %edi,%esi roll $20,%esi movl %esi,%edi xorl %edx,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%edx roll $14,%edi movl %edi,%esi xorl %edx,%edi andl $0x33333333,%edi xorl %edi,%esi xorl %edi,%edx roll $22,%edx movl %edx,%edi xorl %esi,%edx andl $0x03fc03fc,%edx xorl %edx,%edi xorl %edx,%esi roll $9,%edi movl %edi,%edx xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%edx xorl %edi,%esi rorl $3,%edx rorl $2,%esi movl %esi,4(%ebx) movl 36(%esp),%esi movl %edx,(%ebx) movl 40(%esp),%edi movl 44(%esp),%eax movl $0,8(%esp) movl %eax,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $1,8(%esp) movl %edi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $0,8(%esp) movl %esi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin addl $12,%esp movl (%ebx),%edi movl 4(%ebx),%esi roll $2,%esi roll $3,%edi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%ebx) movl %esi,4(%ebx) popl %edi popl %esi popl %ebp popl %ebx ret .size DES_decrypt3,.-.L_DES_decrypt3_begin .globl DES_ncbc_encrypt .type DES_ncbc_encrypt,@function .align 16 DES_ncbc_encrypt: .L_DES_ncbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx pushl %ecx movl 52(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L006decrypt andl $4294967288,%ebp movl 12(%esp),%eax movl 16(%esp),%ebx jz .L007encrypt_finish .L008encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L008encrypt_loop .L007encrypt_finish: movl 56(%esp),%ebp andl $7,%ebp jz .L009finish call .L010PIC_point .L010PIC_point: popl %edx leal .L011cbc_enc_jmp_table-.L010PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L012ej7: movb 6(%esi),%dh shll $8,%edx .L013ej6: movb 5(%esi),%dh .L014ej5: movb 4(%esi),%dl .L015ej4: movl (%esi),%ecx jmp .L016ejend .L017ej3: movb 2(%esi),%ch shll $8,%ecx .L018ej2: movb 1(%esi),%ch .L019ej1: movb (%esi),%cl .L016ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L009finish .L006decrypt: andl $4294967288,%ebp movl 20(%esp),%eax movl 24(%esp),%ebx jz .L020decrypt_finish .L021decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl 20(%esp),%ecx movl 24(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,20(%esp) movl %ebx,24(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L021decrypt_loop .L020decrypt_finish: movl 56(%esp),%ebp andl $7,%ebp jz .L009finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl 20(%esp),%ecx movl 24(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L022dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L023dj6: movb %dh,5(%edi) .L024dj5: movb %dl,4(%edi) .L025dj4: movl %ecx,(%edi) jmp .L026djend .L027dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L028dj2: movb %ch,1(%esi) .L029dj1: movb %cl,(%esi) .L026djend: jmp .L009finish .L009finish: movl 64(%esp),%ecx addl $28,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L011cbc_enc_jmp_table: .long 0 .long .L019ej1-.L010PIC_point .long .L018ej2-.L010PIC_point .long .L017ej3-.L010PIC_point .long .L015ej4-.L010PIC_point .long .L014ej5-.L010PIC_point .long .L013ej6-.L010PIC_point .long .L012ej7-.L010PIC_point .align 64 .size DES_ncbc_encrypt,.-.L_DES_ncbc_encrypt_begin .globl DES_ede3_cbc_encrypt .type DES_ede3_cbc_encrypt,@function .align 16 DES_ede3_cbc_encrypt: .L_DES_ede3_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 44(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 64(%esp),%ecx movl 56(%esp),%eax pushl %eax movl 56(%esp),%eax pushl %eax movl 56(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L030decrypt andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L031encrypt_finish .L032encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_encrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L032encrypt_loop .L031encrypt_finish: movl 60(%esp),%ebp andl $7,%ebp jz .L033finish call .L034PIC_point .L034PIC_point: popl %edx leal .L035cbc_enc_jmp_table-.L034PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L036ej7: movb 6(%esi),%dh shll $8,%edx .L037ej6: movb 5(%esi),%dh .L038ej5: movb 4(%esi),%dl .L039ej4: movl (%esi),%ecx jmp .L040ejend .L041ej3: movb 2(%esi),%ch shll $8,%ecx .L042ej2: movb 1(%esi),%ch .L043ej1: movb (%esi),%cl .L040ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_encrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L033finish .L030decrypt: andl $4294967288,%ebp movl 24(%esp),%eax movl 28(%esp),%ebx jz .L044decrypt_finish .L045decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_decrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl 24(%esp),%ecx movl 28(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,24(%esp) movl %ebx,28(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L045decrypt_loop .L044decrypt_finish: movl 60(%esp),%ebp andl $7,%ebp jz .L033finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_decrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl 24(%esp),%ecx movl 28(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L046dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L047dj6: movb %dh,5(%edi) .L048dj5: movb %dl,4(%edi) .L049dj4: movl %ecx,(%edi) jmp .L050djend .L051dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L052dj2: movb %ch,1(%esi) .L053dj1: movb %cl,(%esi) .L050djend: jmp .L033finish .L033finish: movl 76(%esp),%ecx addl $32,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L035cbc_enc_jmp_table: .long 0 .long .L043ej1-.L034PIC_point .long .L042ej2-.L034PIC_point .long .L041ej3-.L034PIC_point .long .L039ej4-.L034PIC_point .long .L038ej5-.L034PIC_point .long .L037ej6-.L034PIC_point .long .L036ej7-.L034PIC_point .align 64 .size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin .align 64 DES_SPtrans: .Ldes_sptrans: .long 34080768,524288,33554434,34080770 .long 33554432,526338,524290,33554434 .long 526338,34080768,34078720,2050 .long 33556482,33554432,0,524290 .long 524288,2,33556480,526336 .long 34080770,34078720,2050,33556480 .long 2,2048,526336,34078722 .long 2048,33556482,34078722,0 .long 0,34080770,33556480,524290 .long 34080768,524288,2050,33556480 .long 34078722,2048,526336,33554434 .long 526338,2,33554434,34078720 .long 34080770,526336,34078720,33556482 .long 33554432,2050,524290,0 .long 524288,33554432,33556482,34080768 .long 2,34078722,2048,526338 .long 1074823184,0,1081344,1074790400 .long 1073741840,32784,1073774592,1081344 .long 32768,1074790416,16,1073774592 .long 1048592,1074823168,1074790400,16 .long 1048576,1073774608,1074790416,32768 .long 1081360,1073741824,0,1048592 .long 1073774608,1081360,1074823168,1073741840 .long 1073741824,1048576,32784,1074823184 .long 1048592,1074823168,1073774592,1081360 .long 1074823184,1048592,1073741840,0 .long 1073741824,32784,1048576,1074790416 .long 32768,1073741824,1081360,1073774608 .long 1074823168,32768,0,1073741840 .long 16,1074823184,1081344,1074790400 .long 1074790416,1048576,32784,1073774592 .long 1073774608,16,1074790400,1081344 .long 67108865,67371264,256,67109121 .long 262145,67108864,67109121,262400 .long 67109120,262144,67371008,1 .long 67371265,257,1,67371009 .long 0,262145,67371264,256 .long 257,67371265,262144,67108865 .long 67371009,67109120,262401,67371008 .long 262400,0,67108864,262401 .long 67371264,256,1,262144 .long 257,262145,67371008,67109121 .long 0,67371264,262400,67371009 .long 262145,67108864,67371265,1 .long 262401,67108865,67108864,67371265 .long 262144,67109120,67109121,262400 .long 67109120,0,67371009,257 .long 67108865,262401,256,67371008 .long 4198408,268439552,8,272633864 .long 0,272629760,268439560,4194312 .long 272633856,268435464,268435456,4104 .long 268435464,4198408,4194304,268435456 .long 272629768,4198400,4096,8 .long 4198400,268439560,272629760,4096 .long 4104,0,4194312,272633856 .long 268439552,272629768,272633864,4194304 .long 272629768,4104,4194304,268435464 .long 4198400,268439552,8,272629760 .long 268439560,0,4096,4194312 .long 0,272629768,272633856,4096 .long 268435456,272633864,4198408,4194304 .long 272633864,8,268439552,4198408 .long 4194312,4198400,272629760,268439560 .long 4104,268435456,268435464,272633856 .long 134217728,65536,1024,134284320 .long 134283296,134218752,66592,134283264 .long 65536,32,134217760,66560 .long 134218784,134283296,134284288,0 .long 66560,134217728,65568,1056 .long 134218752,66592,0,134217760 .long 32,134218784,134284320,65568 .long 134283264,1024,1056,134284288 .long 134284288,134218784,65568,134283264 .long 65536,32,134217760,134218752 .long 134217728,66560,134284320,0 .long 66592,134217728,1024,65568 .long 134218784,1024,0,134284320 .long 134283296,134284288,1056,65536 .long 66560,134283296,134218752,1056 .long 32,66592,134283264,134217760 .long 2147483712,2097216,0,2149588992 .long 2097216,8192,2147491904,2097152 .long 8256,2149589056,2105344,2147483648 .long 2147491840,2147483712,2149580800,2105408 .long 2097152,2147491904,2149580864,0 .long 8192,64,2149588992,2149580864 .long 2149589056,2149580800,2147483648,8256 .long 64,2105344,2105408,2147491840 .long 8256,2147483648,2147491840,2105408 .long 2149588992,2097216,0,2147491840 .long 2147483648,8192,2149580864,2097152 .long 2097216,2149589056,2105344,64 .long 2149589056,2105344,2097152,2147491904 .long 2147483712,2149580800,2105408,0 .long 8192,2147483712,2147491904,2149588992 .long 2149580800,8256,64,2149580864 .long 16384,512,16777728,16777220 .long 16794116,16388,16896,0 .long 16777216,16777732,516,16793600 .long 4,16794112,16793600,516 .long 16777732,16384,16388,16794116 .long 0,16777728,16777220,16896 .long 16793604,16900,16794112,4 .long 16900,16793604,512,16777216 .long 16900,16793600,16793604,516 .long 16384,512,16777216,16793604 .long 16777732,16900,16896,0 .long 512,16777220,4,16777728 .long 0,16777732,16777728,16896 .long 516,16384,16794116,16777216 .long 16794112,4,16388,16794116 .long 16777220,16794112,16793600,16388 .long 545259648,545390592,131200,0 .long 537001984,8388736,545259520,545390720 .long 128,536870912,8519680,131200 .long 8519808,537002112,536871040,545259520 .long 131072,8519808,8388736,537001984 .long 545390720,536871040,0,8519680 .long 536870912,8388608,537002112,545259648 .long 8388608,131072,545390592,128 .long 8388608,131072,536871040,545390720 .long 131200,536870912,0,8519680 .long 545259648,537002112,537001984,8388736 .long 545390592,128,8388736,537001984 .long 545390720,8388608,545259520,536871040 .long 8519680,131200,537002112,545259520 .long 128,545390592,8519808,0 .long 536870912,545259648,131072,8519808 #else .file "des-586.S" .text .globl DES_SPtrans .type _x86_DES_encrypt,@function .align 16 _x86_DES_encrypt: pushl %ecx movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi addl $4,%esp ret .size _x86_DES_encrypt,.-_x86_DES_encrypt .type _x86_DES_decrypt,@function .align 16 _x86_DES_decrypt: pushl %ecx movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx xorl %esi,%eax xorl %ecx,%ecx xorl %esi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%edi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%edi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%edi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%edi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%edi xorl 0x700(%ebp,%ecx,1),%edi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx xorl %edi,%eax xorl %ecx,%ecx xorl %edi,%edx andl $0xfcfcfcfc,%eax andl $0xcfcfcfcf,%edx movb %al,%bl movb %ah,%cl rorl $4,%edx xorl (%ebp,%ebx,1),%esi movb %dl,%bl xorl 0x200(%ebp,%ecx,1),%esi movb %dh,%cl shrl $16,%eax xorl 0x100(%ebp,%ebx,1),%esi movb %ah,%bl shrl $16,%edx xorl 0x300(%ebp,%ecx,1),%esi movb %dh,%cl andl $0xff,%eax andl $0xff,%edx xorl 0x600(%ebp,%ebx,1),%esi xorl 0x700(%ebp,%ecx,1),%esi movl (%esp),%ecx xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi addl $4,%esp ret .size _x86_DES_decrypt,.-_x86_DES_decrypt .globl DES_encrypt1 .type DES_encrypt1,@function .align 16 DES_encrypt1: .L_DES_encrypt1_begin: pushl %esi pushl %edi movl 12(%esp),%esi xorl %ecx,%ecx pushl %ebx pushl %ebp movl (%esi),%eax movl 28(%esp),%ebx movl 4(%esi),%edi roll $4,%eax movl %eax,%esi xorl %edi,%eax andl $0xf0f0f0f0,%eax xorl %eax,%esi xorl %eax,%edi roll $20,%edi movl %edi,%eax xorl %esi,%edi andl $0xfff0000f,%edi xorl %edi,%eax xorl %edi,%esi roll $14,%eax movl %eax,%edi xorl %esi,%eax andl $0x33333333,%eax xorl %eax,%edi xorl %eax,%esi roll $22,%esi movl %esi,%eax xorl %edi,%esi andl $0x03fc03fc,%esi xorl %esi,%eax xorl %esi,%edi roll $9,%eax movl %eax,%esi xorl %edi,%eax andl $0xaaaaaaaa,%eax xorl %eax,%esi xorl %eax,%edi roll $1,%edi call .L000pic_point .L000pic_point: popl %ebp leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L001decrypt call _x86_DES_encrypt jmp .L002done .L001decrypt: call _x86_DES_decrypt .L002done: movl 20(%esp),%edx rorl $1,%esi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%edx) movl %esi,4(%edx) popl %ebp popl %ebx popl %edi popl %esi ret .size DES_encrypt1,.-.L_DES_encrypt1_begin .globl DES_encrypt2 .type DES_encrypt2,@function .align 16 DES_encrypt2: .L_DES_encrypt2_begin: pushl %esi pushl %edi movl 12(%esp),%eax xorl %ecx,%ecx pushl %ebx pushl %ebp movl (%eax),%esi movl 28(%esp),%ebx roll $3,%esi movl 4(%eax),%edi roll $3,%edi call .L003pic_point .L003pic_point: popl %ebp leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L004decrypt call _x86_DES_encrypt jmp .L005done .L004decrypt: call _x86_DES_decrypt .L005done: rorl $3,%edi movl 20(%esp),%eax rorl $3,%esi movl %edi,(%eax) movl %esi,4(%eax) popl %ebp popl %ebx popl %edi popl %esi ret .size DES_encrypt2,.-.L_DES_encrypt2_begin .globl DES_encrypt3 .type DES_encrypt3,@function .align 16 DES_encrypt3: .L_DES_encrypt3_begin: pushl %ebx movl 8(%esp),%ebx pushl %ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp roll $4,%edi movl %edi,%edx xorl %esi,%edi andl $0xf0f0f0f0,%edi xorl %edi,%edx xorl %edi,%esi roll $20,%esi movl %esi,%edi xorl %edx,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%edx roll $14,%edi movl %edi,%esi xorl %edx,%edi andl $0x33333333,%edi xorl %edi,%esi xorl %edi,%edx roll $22,%edx movl %edx,%edi xorl %esi,%edx andl $0x03fc03fc,%edx xorl %edx,%edi xorl %edx,%esi roll $9,%edi movl %edi,%edx xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%edx xorl %edi,%esi rorl $3,%edx rorl $2,%esi movl %esi,4(%ebx) movl 36(%esp),%eax movl %edx,(%ebx) movl 40(%esp),%edi movl 44(%esp),%esi movl $1,8(%esp) movl %eax,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $0,8(%esp) movl %edi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $1,8(%esp) movl %esi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin addl $12,%esp movl (%ebx),%edi movl 4(%ebx),%esi roll $2,%esi roll $3,%edi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%ebx) movl %esi,4(%ebx) popl %edi popl %esi popl %ebp popl %ebx ret .size DES_encrypt3,.-.L_DES_encrypt3_begin .globl DES_decrypt3 .type DES_decrypt3,@function .align 16 DES_decrypt3: .L_DES_decrypt3_begin: pushl %ebx movl 8(%esp),%ebx pushl %ebp pushl %esi pushl %edi movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp roll $4,%edi movl %edi,%edx xorl %esi,%edi andl $0xf0f0f0f0,%edi xorl %edi,%edx xorl %edi,%esi roll $20,%esi movl %esi,%edi xorl %edx,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%edx roll $14,%edi movl %edi,%esi xorl %edx,%edi andl $0x33333333,%edi xorl %edi,%esi xorl %edi,%edx roll $22,%edx movl %edx,%edi xorl %esi,%edx andl $0x03fc03fc,%edx xorl %edx,%edi xorl %edx,%esi roll $9,%edi movl %edi,%edx xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%edx xorl %edi,%esi rorl $3,%edx rorl $2,%esi movl %esi,4(%ebx) movl 36(%esp),%esi movl %edx,(%ebx) movl 40(%esp),%edi movl 44(%esp),%eax movl $0,8(%esp) movl %eax,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $1,8(%esp) movl %edi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin movl $0,8(%esp) movl %esi,4(%esp) movl %ebx,(%esp) call .L_DES_encrypt2_begin addl $12,%esp movl (%ebx),%edi movl 4(%ebx),%esi roll $2,%esi roll $3,%edi movl %edi,%eax xorl %esi,%edi andl $0xaaaaaaaa,%edi xorl %edi,%eax xorl %edi,%esi roll $23,%eax movl %eax,%edi xorl %esi,%eax andl $0x03fc03fc,%eax xorl %eax,%edi xorl %eax,%esi roll $10,%edi movl %edi,%eax xorl %esi,%edi andl $0x33333333,%edi xorl %edi,%eax xorl %edi,%esi roll $18,%esi movl %esi,%edi xorl %eax,%esi andl $0xfff0000f,%esi xorl %esi,%edi xorl %esi,%eax roll $12,%edi movl %edi,%esi xorl %eax,%edi andl $0xf0f0f0f0,%edi xorl %edi,%esi xorl %edi,%eax rorl $4,%eax movl %eax,(%ebx) movl %esi,4(%ebx) popl %edi popl %esi popl %ebp popl %ebx ret .size DES_decrypt3,.-.L_DES_decrypt3_begin .globl DES_ncbc_encrypt .type DES_ncbc_encrypt,@function .align 16 DES_ncbc_encrypt: .L_DES_ncbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx pushl %ecx movl 52(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L006decrypt andl $4294967288,%ebp movl 12(%esp),%eax movl 16(%esp),%ebx jz .L007encrypt_finish .L008encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L008encrypt_loop .L007encrypt_finish: movl 56(%esp),%ebp andl $7,%ebp jz .L009finish call .L010PIC_point .L010PIC_point: popl %edx leal .L011cbc_enc_jmp_table-.L010PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L012ej7: movb 6(%esi),%dh shll $8,%edx .L013ej6: movb 5(%esi),%dh .L014ej5: movb 4(%esi),%dl .L015ej4: movl (%esi),%ecx jmp .L016ejend .L017ej3: movb 2(%esi),%ch shll $8,%ecx .L018ej2: movb 1(%esi),%ch .L019ej1: movb (%esi),%cl .L016ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L009finish .L006decrypt: andl $4294967288,%ebp movl 20(%esp),%eax movl 24(%esp),%ebx jz .L020decrypt_finish .L021decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl 20(%esp),%ecx movl 24(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,20(%esp) movl %ebx,24(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L021decrypt_loop .L020decrypt_finish: movl 56(%esp),%ebp andl $7,%ebp jz .L009finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,12(%esp) movl %ebx,16(%esp) call .L_DES_encrypt1_begin movl 12(%esp),%eax movl 16(%esp),%ebx movl 20(%esp),%ecx movl 24(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L022dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L023dj6: movb %dh,5(%edi) .L024dj5: movb %dl,4(%edi) .L025dj4: movl %ecx,(%edi) jmp .L026djend .L027dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L028dj2: movb %ch,1(%esi) .L029dj1: movb %cl,(%esi) .L026djend: jmp .L009finish .L009finish: movl 64(%esp),%ecx addl $28,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L011cbc_enc_jmp_table: .long 0 .long .L019ej1-.L010PIC_point .long .L018ej2-.L010PIC_point .long .L017ej3-.L010PIC_point .long .L015ej4-.L010PIC_point .long .L014ej5-.L010PIC_point .long .L013ej6-.L010PIC_point .long .L012ej7-.L010PIC_point .align 64 .size DES_ncbc_encrypt,.-.L_DES_ncbc_encrypt_begin .globl DES_ede3_cbc_encrypt .type DES_ede3_cbc_encrypt,@function .align 16 DES_ede3_cbc_encrypt: .L_DES_ede3_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 44(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 64(%esp),%ecx movl 56(%esp),%eax pushl %eax movl 56(%esp),%eax pushl %eax movl 56(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L030decrypt andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L031encrypt_finish .L032encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_encrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L032encrypt_loop .L031encrypt_finish: movl 60(%esp),%ebp andl $7,%ebp jz .L033finish call .L034PIC_point .L034PIC_point: popl %edx leal .L035cbc_enc_jmp_table-.L034PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L036ej7: movb 6(%esi),%dh shll $8,%edx .L037ej6: movb 5(%esi),%dh .L038ej5: movb 4(%esi),%dl .L039ej4: movl (%esi),%ecx jmp .L040ejend .L041ej3: movb 2(%esi),%ch shll $8,%ecx .L042ej2: movb 1(%esi),%ch .L043ej1: movb (%esi),%cl .L040ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_encrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L033finish .L030decrypt: andl $4294967288,%ebp movl 24(%esp),%eax movl 28(%esp),%ebx jz .L044decrypt_finish .L045decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_decrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl 24(%esp),%ecx movl 28(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,24(%esp) movl %ebx,28(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L045decrypt_loop .L044decrypt_finish: movl 60(%esp),%ebp andl $7,%ebp jz .L033finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,16(%esp) movl %ebx,20(%esp) call .L_DES_decrypt3_begin movl 16(%esp),%eax movl 20(%esp),%ebx movl 24(%esp),%ecx movl 28(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L046dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L047dj6: movb %dh,5(%edi) .L048dj5: movb %dl,4(%edi) .L049dj4: movl %ecx,(%edi) jmp .L050djend .L051dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L052dj2: movb %ch,1(%esi) .L053dj1: movb %cl,(%esi) .L050djend: jmp .L033finish .L033finish: movl 76(%esp),%ecx addl $32,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L035cbc_enc_jmp_table: .long 0 .long .L043ej1-.L034PIC_point .long .L042ej2-.L034PIC_point .long .L041ej3-.L034PIC_point .long .L039ej4-.L034PIC_point .long .L038ej5-.L034PIC_point .long .L037ej6-.L034PIC_point .long .L036ej7-.L034PIC_point .align 64 .size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin .align 64 DES_SPtrans: .Ldes_sptrans: .long 34080768,524288,33554434,34080770 .long 33554432,526338,524290,33554434 .long 526338,34080768,34078720,2050 .long 33556482,33554432,0,524290 .long 524288,2,33556480,526336 .long 34080770,34078720,2050,33556480 .long 2,2048,526336,34078722 .long 2048,33556482,34078722,0 .long 0,34080770,33556480,524290 .long 34080768,524288,2050,33556480 .long 34078722,2048,526336,33554434 .long 526338,2,33554434,34078720 .long 34080770,526336,34078720,33556482 .long 33554432,2050,524290,0 .long 524288,33554432,33556482,34080768 .long 2,34078722,2048,526338 .long 1074823184,0,1081344,1074790400 .long 1073741840,32784,1073774592,1081344 .long 32768,1074790416,16,1073774592 .long 1048592,1074823168,1074790400,16 .long 1048576,1073774608,1074790416,32768 .long 1081360,1073741824,0,1048592 .long 1073774608,1081360,1074823168,1073741840 .long 1073741824,1048576,32784,1074823184 .long 1048592,1074823168,1073774592,1081360 .long 1074823184,1048592,1073741840,0 .long 1073741824,32784,1048576,1074790416 .long 32768,1073741824,1081360,1073774608 .long 1074823168,32768,0,1073741840 .long 16,1074823184,1081344,1074790400 .long 1074790416,1048576,32784,1073774592 .long 1073774608,16,1074790400,1081344 .long 67108865,67371264,256,67109121 .long 262145,67108864,67109121,262400 .long 67109120,262144,67371008,1 .long 67371265,257,1,67371009 .long 0,262145,67371264,256 .long 257,67371265,262144,67108865 .long 67371009,67109120,262401,67371008 .long 262400,0,67108864,262401 .long 67371264,256,1,262144 .long 257,262145,67371008,67109121 .long 0,67371264,262400,67371009 .long 262145,67108864,67371265,1 .long 262401,67108865,67108864,67371265 .long 262144,67109120,67109121,262400 .long 67109120,0,67371009,257 .long 67108865,262401,256,67371008 .long 4198408,268439552,8,272633864 .long 0,272629760,268439560,4194312 .long 272633856,268435464,268435456,4104 .long 268435464,4198408,4194304,268435456 .long 272629768,4198400,4096,8 .long 4198400,268439560,272629760,4096 .long 4104,0,4194312,272633856 .long 268439552,272629768,272633864,4194304 .long 272629768,4104,4194304,268435464 .long 4198400,268439552,8,272629760 .long 268439560,0,4096,4194312 .long 0,272629768,272633856,4096 .long 268435456,272633864,4198408,4194304 .long 272633864,8,268439552,4198408 .long 4194312,4198400,272629760,268439560 .long 4104,268435456,268435464,272633856 .long 134217728,65536,1024,134284320 .long 134283296,134218752,66592,134283264 .long 65536,32,134217760,66560 .long 134218784,134283296,134284288,0 .long 66560,134217728,65568,1056 .long 134218752,66592,0,134217760 .long 32,134218784,134284320,65568 .long 134283264,1024,1056,134284288 .long 134284288,134218784,65568,134283264 .long 65536,32,134217760,134218752 .long 134217728,66560,134284320,0 .long 66592,134217728,1024,65568 .long 134218784,1024,0,134284320 .long 134283296,134284288,1056,65536 .long 66560,134283296,134218752,1056 .long 32,66592,134283264,134217760 .long 2147483712,2097216,0,2149588992 .long 2097216,8192,2147491904,2097152 .long 8256,2149589056,2105344,2147483648 .long 2147491840,2147483712,2149580800,2105408 .long 2097152,2147491904,2149580864,0 .long 8192,64,2149588992,2149580864 .long 2149589056,2149580800,2147483648,8256 .long 64,2105344,2105408,2147491840 .long 8256,2147483648,2147491840,2105408 .long 2149588992,2097216,0,2147491840 .long 2147483648,8192,2149580864,2097152 .long 2097216,2149589056,2105344,64 .long 2149589056,2105344,2097152,2147491904 .long 2147483712,2149580800,2105408,0 .long 8192,2147483712,2147491904,2149588992 .long 2149580800,8256,64,2149580864 .long 16384,512,16777728,16777220 .long 16794116,16388,16896,0 .long 16777216,16777732,516,16793600 .long 4,16794112,16793600,516 .long 16777732,16384,16388,16794116 .long 0,16777728,16777220,16896 .long 16793604,16900,16794112,4 .long 16900,16793604,512,16777216 .long 16900,16793600,16793604,516 .long 16384,512,16777216,16793604 .long 16777732,16900,16896,0 .long 512,16777220,4,16777728 .long 0,16777732,16777728,16896 .long 516,16384,16794116,16777216 .long 16794112,4,16388,16794116 .long 16777220,16794112,16793600,16388 .long 545259648,545390592,131200,0 .long 537001984,8388736,545259520,545390720 .long 128,536870912,8519680,131200 .long 8519808,537002112,536871040,545259520 .long 131072,8519808,8388736,537001984 .long 545390720,536871040,0,8519680 .long 536870912,8388608,537002112,545259648 .long 8388608,131072,545390592,128 .long 8388608,131072,536871040,545390720 .long 131200,536870912,0,8519680 .long 545259648,537002112,537001984,8388736 .long 545390592,128,8388736,537001984 .long 545390720,8388608,545259520,536871040 .long 8519680,131200,537002112,545259520 .long 128,545390592,8519808,0 .long 536870912,545259648,131072,8519808 #endif Index: head/secure/lib/libcrypto/i386/ghash-x86.S =================================================================== --- head/secure/lib/libcrypto/i386/ghash-x86.S (revision 299480) +++ head/secure/lib/libcrypto/i386/ghash-x86.S (revision 299481) @@ -1,2534 +1,2535 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from ghash-x86.pl. #ifdef PIC .file "ghash-x86.S" .text .globl gcm_gmult_4bit_x86 .type gcm_gmult_4bit_x86,@function .align 16 gcm_gmult_4bit_x86: .L_gcm_gmult_4bit_x86_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $84,%esp movl 104(%esp),%edi movl 108(%esp),%esi movl (%edi),%ebp movl 4(%edi),%edx movl 8(%edi),%ecx movl 12(%edi),%ebx movl $0,16(%esp) movl $471859200,20(%esp) movl $943718400,24(%esp) movl $610271232,28(%esp) movl $1887436800,32(%esp) movl $1822425088,36(%esp) movl $1220542464,40(%esp) movl $1423966208,44(%esp) movl $3774873600,48(%esp) movl $4246732800,52(%esp) movl $3644850176,56(%esp) movl $3311403008,60(%esp) movl $2441084928,64(%esp) movl $2376073216,68(%esp) movl $2847932416,72(%esp) movl $3051356160,76(%esp) movl %ebp,(%esp) movl %edx,4(%esp) movl %ecx,8(%esp) movl %ebx,12(%esp) shrl $20,%ebx andl $240,%ebx movl 4(%esi,%ebx,1),%ebp movl (%esi,%ebx,1),%edx movl 12(%esi,%ebx,1),%ecx movl 8(%esi,%ebx,1),%ebx xorl %eax,%eax movl $15,%edi jmp .L000x86_loop .align 16 .L000x86_loop: movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al andb $240,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp decl %edi js .L001x86_break movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al shlb $4,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp jmp .L000x86_loop .align 16 .L001x86_break: bswap %ebx bswap %ecx bswap %edx bswap %ebp movl 104(%esp),%edi movl %ebx,12(%edi) movl %ecx,8(%edi) movl %edx,4(%edi) movl %ebp,(%edi) addl $84,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin .globl gcm_ghash_4bit_x86 .type gcm_ghash_4bit_x86,@function .align 16 gcm_ghash_4bit_x86: .L_gcm_ghash_4bit_x86_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $84,%esp movl 104(%esp),%ebx movl 108(%esp),%esi movl 112(%esp),%edi movl 116(%esp),%ecx addl %edi,%ecx movl %ecx,116(%esp) movl (%ebx),%ebp movl 4(%ebx),%edx movl 8(%ebx),%ecx movl 12(%ebx),%ebx movl $0,16(%esp) movl $471859200,20(%esp) movl $943718400,24(%esp) movl $610271232,28(%esp) movl $1887436800,32(%esp) movl $1822425088,36(%esp) movl $1220542464,40(%esp) movl $1423966208,44(%esp) movl $3774873600,48(%esp) movl $4246732800,52(%esp) movl $3644850176,56(%esp) movl $3311403008,60(%esp) movl $2441084928,64(%esp) movl $2376073216,68(%esp) movl $2847932416,72(%esp) movl $3051356160,76(%esp) .align 16 .L002x86_outer_loop: xorl 12(%edi),%ebx xorl 8(%edi),%ecx xorl 4(%edi),%edx xorl (%edi),%ebp movl %ebx,12(%esp) movl %ecx,8(%esp) movl %edx,4(%esp) movl %ebp,(%esp) shrl $20,%ebx andl $240,%ebx movl 4(%esi,%ebx,1),%ebp movl (%esi,%ebx,1),%edx movl 12(%esi,%ebx,1),%ecx movl 8(%esi,%ebx,1),%ebx xorl %eax,%eax movl $15,%edi jmp .L003x86_loop .align 16 .L003x86_loop: movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al andb $240,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp decl %edi js .L004x86_break movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al shlb $4,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp jmp .L003x86_loop .align 16 .L004x86_break: bswap %ebx bswap %ecx bswap %edx bswap %ebp movl 112(%esp),%edi leal 16(%edi),%edi cmpl 116(%esp),%edi movl %edi,112(%esp) jb .L002x86_outer_loop movl 104(%esp),%edi movl %ebx,12(%edi) movl %ecx,8(%edi) movl %edx,4(%edi) movl %ebp,(%edi) addl $84,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin .globl gcm_gmult_4bit_mmx .type gcm_gmult_4bit_mmx,@function .align 16 gcm_gmult_4bit_mmx: .L_gcm_gmult_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%esi call .L005pic_point .L005pic_point: popl %eax leal .Lrem_4bit-.L005pic_point(%eax),%eax movzbl 15(%edi),%ebx xorl %ecx,%ecx movl %ebx,%edx movb %dl,%cl movl $14,%ebp shlb $4,%cl andl $240,%edx movq 8(%esi,%ecx,1),%mm0 movq (%esi,%ecx,1),%mm1 movd %mm0,%ebx jmp .L006mmx_loop .align 16 .L006mmx_loop: psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 movb (%edi,%ebp,1),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 decl %ebp movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 movl %ecx,%edx pxor %mm2,%mm0 js .L007mmx_break shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 jmp .L006mmx_loop .align 16 .L007mmx_break: shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 pxor %mm2,%mm0 psrlq $32,%mm0 movd %mm1,%edx psrlq $32,%mm1 movd %mm0,%ecx movd %mm1,%ebp bswap %ebx bswap %edx bswap %ecx bswap %ebp emms movl %ebx,12(%edi) movl %edx,4(%edi) movl %ecx,8(%edi) movl %ebp,(%edi) popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin .globl gcm_ghash_4bit_mmx .type gcm_ghash_4bit_mmx,@function .align 16 gcm_ghash_4bit_mmx: .L_gcm_ghash_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%ebx movl 28(%esp),%ecx movl 32(%esp),%edx movl %esp,%ebp call .L008pic_point .L008pic_point: popl %esi leal .Lrem_8bit-.L008pic_point(%esi),%esi subl $544,%esp andl $-64,%esp subl $16,%esp addl %ecx,%edx movl %eax,544(%esp) movl %edx,552(%esp) movl %ebp,556(%esp) addl $128,%ebx leal 144(%esp),%edi leal 400(%esp),%ebp movl -120(%ebx),%edx movq -120(%ebx),%mm0 movq -128(%ebx),%mm3 shll $4,%edx movb %dl,(%esp) movl -104(%ebx),%edx movq -104(%ebx),%mm2 movq -112(%ebx),%mm5 movq %mm0,-128(%edi) psrlq $4,%mm0 movq %mm3,(%edi) movq %mm3,%mm7 psrlq $4,%mm3 shll $4,%edx movb %dl,1(%esp) movl -88(%ebx),%edx movq -88(%ebx),%mm1 psllq $60,%mm7 movq -96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-120(%edi) psrlq $4,%mm2 movq %mm5,8(%edi) movq %mm5,%mm6 movq %mm0,-128(%ebp) psrlq $4,%mm5 movq %mm3,(%ebp) shll $4,%edx movb %dl,2(%esp) movl -72(%ebx),%edx movq -72(%ebx),%mm0 psllq $60,%mm6 movq -80(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-112(%edi) psrlq $4,%mm1 movq %mm4,16(%edi) movq %mm4,%mm7 movq %mm2,-120(%ebp) psrlq $4,%mm4 movq %mm5,8(%ebp) shll $4,%edx movb %dl,3(%esp) movl -56(%ebx),%edx movq -56(%ebx),%mm2 psllq $60,%mm7 movq -64(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-104(%edi) psrlq $4,%mm0 movq %mm3,24(%edi) movq %mm3,%mm6 movq %mm1,-112(%ebp) psrlq $4,%mm3 movq %mm4,16(%ebp) shll $4,%edx movb %dl,4(%esp) movl -40(%ebx),%edx movq -40(%ebx),%mm1 psllq $60,%mm6 movq -48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-96(%edi) psrlq $4,%mm2 movq %mm5,32(%edi) movq %mm5,%mm7 movq %mm0,-104(%ebp) psrlq $4,%mm5 movq %mm3,24(%ebp) shll $4,%edx movb %dl,5(%esp) movl -24(%ebx),%edx movq -24(%ebx),%mm0 psllq $60,%mm7 movq -32(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-88(%edi) psrlq $4,%mm1 movq %mm4,40(%edi) movq %mm4,%mm6 movq %mm2,-96(%ebp) psrlq $4,%mm4 movq %mm5,32(%ebp) shll $4,%edx movb %dl,6(%esp) movl -8(%ebx),%edx movq -8(%ebx),%mm2 psllq $60,%mm6 movq -16(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-80(%edi) psrlq $4,%mm0 movq %mm3,48(%edi) movq %mm3,%mm7 movq %mm1,-88(%ebp) psrlq $4,%mm3 movq %mm4,40(%ebp) shll $4,%edx movb %dl,7(%esp) movl 8(%ebx),%edx movq 8(%ebx),%mm1 psllq $60,%mm7 movq (%ebx),%mm4 por %mm7,%mm0 movq %mm2,-72(%edi) psrlq $4,%mm2 movq %mm5,56(%edi) movq %mm5,%mm6 movq %mm0,-80(%ebp) psrlq $4,%mm5 movq %mm3,48(%ebp) shll $4,%edx movb %dl,8(%esp) movl 24(%ebx),%edx movq 24(%ebx),%mm0 psllq $60,%mm6 movq 16(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-64(%edi) psrlq $4,%mm1 movq %mm4,64(%edi) movq %mm4,%mm7 movq %mm2,-72(%ebp) psrlq $4,%mm4 movq %mm5,56(%ebp) shll $4,%edx movb %dl,9(%esp) movl 40(%ebx),%edx movq 40(%ebx),%mm2 psllq $60,%mm7 movq 32(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-56(%edi) psrlq $4,%mm0 movq %mm3,72(%edi) movq %mm3,%mm6 movq %mm1,-64(%ebp) psrlq $4,%mm3 movq %mm4,64(%ebp) shll $4,%edx movb %dl,10(%esp) movl 56(%ebx),%edx movq 56(%ebx),%mm1 psllq $60,%mm6 movq 48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-48(%edi) psrlq $4,%mm2 movq %mm5,80(%edi) movq %mm5,%mm7 movq %mm0,-56(%ebp) psrlq $4,%mm5 movq %mm3,72(%ebp) shll $4,%edx movb %dl,11(%esp) movl 72(%ebx),%edx movq 72(%ebx),%mm0 psllq $60,%mm7 movq 64(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-40(%edi) psrlq $4,%mm1 movq %mm4,88(%edi) movq %mm4,%mm6 movq %mm2,-48(%ebp) psrlq $4,%mm4 movq %mm5,80(%ebp) shll $4,%edx movb %dl,12(%esp) movl 88(%ebx),%edx movq 88(%ebx),%mm2 psllq $60,%mm6 movq 80(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-32(%edi) psrlq $4,%mm0 movq %mm3,96(%edi) movq %mm3,%mm7 movq %mm1,-40(%ebp) psrlq $4,%mm3 movq %mm4,88(%ebp) shll $4,%edx movb %dl,13(%esp) movl 104(%ebx),%edx movq 104(%ebx),%mm1 psllq $60,%mm7 movq 96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-24(%edi) psrlq $4,%mm2 movq %mm5,104(%edi) movq %mm5,%mm6 movq %mm0,-32(%ebp) psrlq $4,%mm5 movq %mm3,96(%ebp) shll $4,%edx movb %dl,14(%esp) movl 120(%ebx),%edx movq 120(%ebx),%mm0 psllq $60,%mm6 movq 112(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-16(%edi) psrlq $4,%mm1 movq %mm4,112(%edi) movq %mm4,%mm7 movq %mm2,-24(%ebp) psrlq $4,%mm4 movq %mm5,104(%ebp) shll $4,%edx movb %dl,15(%esp) psllq $60,%mm7 por %mm7,%mm1 movq %mm0,-8(%edi) psrlq $4,%mm0 movq %mm3,120(%edi) movq %mm3,%mm6 movq %mm1,-16(%ebp) psrlq $4,%mm3 movq %mm4,112(%ebp) psllq $60,%mm6 por %mm6,%mm0 movq %mm0,-8(%ebp) movq %mm3,120(%ebp) movq (%eax),%mm6 movl 8(%eax),%ebx movl 12(%eax),%edx .align 16 .L009outer: xorl 12(%ecx),%edx xorl 8(%ecx),%ebx pxor (%ecx),%mm6 leal 16(%ecx),%ecx movl %ebx,536(%esp) movq %mm6,528(%esp) movl %ecx,548(%esp) xorl %eax,%eax roll $8,%edx movb %dl,%al movl %eax,%ebp andb $15,%al shrl $4,%ebp pxor %mm0,%mm0 roll $8,%edx pxor %mm1,%mm1 pxor %mm2,%mm2 movq 16(%esp,%eax,8),%mm7 movq 144(%esp,%eax,8),%mm6 movb %dl,%al movd %mm7,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%edi pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 536(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 532(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 528(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 524(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 pxor 144(%esp,%eax,8),%mm6 xorb (%esp,%ebp,1),%bl pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 movzbl %bl,%ebx pxor %mm2,%mm2 psllq $4,%mm1 movd %mm7,%ecx psrlq $4,%mm7 movq %mm6,%mm3 psrlq $4,%mm6 shll $4,%ecx pxor 16(%esp,%edi,8),%mm7 psllq $60,%mm3 movzbl %cl,%ecx pxor %mm3,%mm7 pxor 144(%esp,%edi,8),%mm6 pinsrw $2,(%esi,%ebx,2),%mm0 pxor %mm1,%mm6 movd %mm7,%edx pinsrw $3,(%esi,%ecx,2),%mm2 psllq $12,%mm0 pxor %mm0,%mm6 psrlq $32,%mm7 pxor %mm2,%mm6 movl 548(%esp),%ecx movd %mm7,%ebx movq %mm6,%mm3 psllw $8,%mm6 psrlw $8,%mm3 por %mm3,%mm6 bswap %edx pshufw $27,%mm6,%mm6 bswap %ebx cmpl 552(%esp),%ecx jne .L009outer movl 544(%esp),%eax movl %edx,12(%eax) movl %ebx,8(%eax) movq %mm6,(%eax) movl 556(%esp),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin .globl gcm_init_clmul .type gcm_init_clmul,@function .align 16 gcm_init_clmul: .L_gcm_init_clmul_begin: movl 4(%esp),%edx movl 8(%esp),%eax call .L010pic .L010pic: popl %ecx leal .Lbswap-.L010pic(%ecx),%ecx movdqu (%eax),%xmm2 pshufd $78,%xmm2,%xmm2 pshufd $255,%xmm2,%xmm4 movdqa %xmm2,%xmm3 psllq $1,%xmm2 pxor %xmm5,%xmm5 psrlq $63,%xmm3 pcmpgtd %xmm4,%xmm5 pslldq $8,%xmm3 por %xmm3,%xmm2 pand 16(%ecx),%xmm5 pxor %xmm5,%xmm2 movdqa %xmm2,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm2,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm2,%xmm3 movdqu %xmm2,(%edx) pxor %xmm0,%xmm4 movdqu %xmm0,16(%edx) .byte 102,15,58,15,227,8 movdqu %xmm4,32(%edx) ret .size gcm_init_clmul,.-.L_gcm_init_clmul_begin .globl gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: .L_gcm_gmult_clmul_begin: movl 4(%esp),%eax movl 8(%esp),%edx call .L011pic .L011pic: popl %ecx leal .Lbswap-.L011pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movups (%edx),%xmm2 .byte 102,15,56,0,197 movups 32(%edx),%xmm4 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%eax) ret .size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin .globl gcm_ghash_clmul .type gcm_ghash_clmul,@function .align 16 gcm_ghash_clmul: .L_gcm_ghash_clmul_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%edx movl 28(%esp),%esi movl 32(%esp),%ebx call .L012pic .L012pic: popl %ecx leal .Lbswap-.L012pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movdqu (%edx),%xmm2 .byte 102,15,56,0,197 subl $16,%ebx jz .L013odd_tail movdqu (%esi),%xmm3 movdqu 16(%esi),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 movdqu 32(%edx),%xmm5 pxor %xmm3,%xmm0 pshufd $78,%xmm6,%xmm3 movdqa %xmm6,%xmm7 pxor %xmm6,%xmm3 leal 32(%esi),%esi .byte 102,15,58,68,242,0 .byte 102,15,58,68,250,17 .byte 102,15,58,68,221,0 movups 16(%edx),%xmm2 nop subl $32,%ebx jbe .L014even_tail jmp .L015mod_loop .align 32 .L015mod_loop: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 nop .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movups (%edx),%xmm2 xorps %xmm6,%xmm0 movdqa (%ecx),%xmm5 xorps %xmm7,%xmm1 movdqu (%esi),%xmm7 pxor %xmm0,%xmm3 movdqu 16(%esi),%xmm6 pxor %xmm1,%xmm3 .byte 102,15,56,0,253 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 .byte 102,15,56,0,245 pxor %xmm7,%xmm1 movdqa %xmm6,%xmm7 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 .byte 102,15,58,68,242,0 movups 32(%edx),%xmm5 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 pshufd $78,%xmm7,%xmm3 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm7,%xmm3 pxor %xmm4,%xmm1 .byte 102,15,58,68,250,17 movups 16(%edx),%xmm2 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,58,68,221,0 leal 32(%esi),%esi subl $32,%ebx ja .L015mod_loop .L014even_tail: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movdqa (%ecx),%xmm5 xorps %xmm6,%xmm0 xorps %xmm7,%xmm1 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 testl %ebx,%ebx jnz .L016done movups (%edx),%xmm2 .L013odd_tail: movdqu (%esi),%xmm3 .byte 102,15,56,0,221 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .L016done: .byte 102,15,56,0,197 movdqu %xmm0,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin .align 64 .Lbswap: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 64 .Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 .value 7200,7650,8100,7782,6952,6890,6316,6510 .value 4656,5106,4532,4214,5432,5370,5820,6014 .value 14400,14722,15300,14854,16200,16010,15564,15630 .value 13904,14226,13780,13334,12632,12442,13020,13086 .value 9312,9634,10212,9766,9064,8874,8428,8494 .value 10864,11186,10740,10294,11640,11450,12028,12094 .value 28800,28994,29444,29382,30600,30282,29708,30158 .value 32400,32594,32020,31958,31128,30810,31260,31710 .value 27808,28002,28452,28390,27560,27242,26668,27118 .value 25264,25458,24884,24822,26040,25722,26172,26622 .value 18624,18690,19268,19078,20424,19978,19532,19854 .value 18128,18194,17748,17558,16856,16410,16988,17310 .value 21728,21794,22372,22182,21480,21034,20588,20910 .value 23280,23346,22900,22710,24056,23610,24188,24510 .value 57600,57538,57988,58182,58888,59338,58764,58446 .value 61200,61138,60564,60758,59416,59866,60316,59998 .value 64800,64738,65188,65382,64040,64490,63916,63598 .value 62256,62194,61620,61814,62520,62970,63420,63102 .value 55616,55426,56004,56070,56904,57226,56780,56334 .value 55120,54930,54484,54550,53336,53658,54236,53790 .value 50528,50338,50916,50982,49768,50090,49644,49198 .value 52080,51890,51444,51510,52344,52666,53244,52798 .value 37248,36930,37380,37830,38536,38730,38156,38094 .value 40848,40530,39956,40406,39064,39258,39708,39646 .value 36256,35938,36388,36838,35496,35690,35116,35054 .value 33712,33394,32820,33270,33976,34170,34620,34558 .value 43456,43010,43588,43910,44744,44810,44364,44174 .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 .align 64 .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 .byte 0 #else .file "ghash-x86.S" .text .globl gcm_gmult_4bit_x86 .type gcm_gmult_4bit_x86,@function .align 16 gcm_gmult_4bit_x86: .L_gcm_gmult_4bit_x86_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $84,%esp movl 104(%esp),%edi movl 108(%esp),%esi movl (%edi),%ebp movl 4(%edi),%edx movl 8(%edi),%ecx movl 12(%edi),%ebx movl $0,16(%esp) movl $471859200,20(%esp) movl $943718400,24(%esp) movl $610271232,28(%esp) movl $1887436800,32(%esp) movl $1822425088,36(%esp) movl $1220542464,40(%esp) movl $1423966208,44(%esp) movl $3774873600,48(%esp) movl $4246732800,52(%esp) movl $3644850176,56(%esp) movl $3311403008,60(%esp) movl $2441084928,64(%esp) movl $2376073216,68(%esp) movl $2847932416,72(%esp) movl $3051356160,76(%esp) movl %ebp,(%esp) movl %edx,4(%esp) movl %ecx,8(%esp) movl %ebx,12(%esp) shrl $20,%ebx andl $240,%ebx movl 4(%esi,%ebx,1),%ebp movl (%esi,%ebx,1),%edx movl 12(%esi,%ebx,1),%ecx movl 8(%esi,%ebx,1),%ebx xorl %eax,%eax movl $15,%edi jmp .L000x86_loop .align 16 .L000x86_loop: movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al andb $240,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp decl %edi js .L001x86_break movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al shlb $4,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp jmp .L000x86_loop .align 16 .L001x86_break: bswap %ebx bswap %ecx bswap %edx bswap %ebp movl 104(%esp),%edi movl %ebx,12(%edi) movl %ecx,8(%edi) movl %edx,4(%edi) movl %ebp,(%edi) addl $84,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin .globl gcm_ghash_4bit_x86 .type gcm_ghash_4bit_x86,@function .align 16 gcm_ghash_4bit_x86: .L_gcm_ghash_4bit_x86_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $84,%esp movl 104(%esp),%ebx movl 108(%esp),%esi movl 112(%esp),%edi movl 116(%esp),%ecx addl %edi,%ecx movl %ecx,116(%esp) movl (%ebx),%ebp movl 4(%ebx),%edx movl 8(%ebx),%ecx movl 12(%ebx),%ebx movl $0,16(%esp) movl $471859200,20(%esp) movl $943718400,24(%esp) movl $610271232,28(%esp) movl $1887436800,32(%esp) movl $1822425088,36(%esp) movl $1220542464,40(%esp) movl $1423966208,44(%esp) movl $3774873600,48(%esp) movl $4246732800,52(%esp) movl $3644850176,56(%esp) movl $3311403008,60(%esp) movl $2441084928,64(%esp) movl $2376073216,68(%esp) movl $2847932416,72(%esp) movl $3051356160,76(%esp) .align 16 .L002x86_outer_loop: xorl 12(%edi),%ebx xorl 8(%edi),%ecx xorl 4(%edi),%edx xorl (%edi),%ebp movl %ebx,12(%esp) movl %ecx,8(%esp) movl %edx,4(%esp) movl %ebp,(%esp) shrl $20,%ebx andl $240,%ebx movl 4(%esi,%ebx,1),%ebp movl (%esi,%ebx,1),%edx movl 12(%esi,%ebx,1),%ecx movl 8(%esi,%ebx,1),%ebx xorl %eax,%eax movl $15,%edi jmp .L003x86_loop .align 16 .L003x86_loop: movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al andb $240,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp decl %edi js .L004x86_break movb %bl,%al shrdl $4,%ecx,%ebx andb $15,%al shrdl $4,%edx,%ecx shrdl $4,%ebp,%edx shrl $4,%ebp xorl 16(%esp,%eax,4),%ebp movb (%esp,%edi,1),%al shlb $4,%al xorl 8(%esi,%eax,1),%ebx xorl 12(%esi,%eax,1),%ecx xorl (%esi,%eax,1),%edx xorl 4(%esi,%eax,1),%ebp jmp .L003x86_loop .align 16 .L004x86_break: bswap %ebx bswap %ecx bswap %edx bswap %ebp movl 112(%esp),%edi leal 16(%edi),%edi cmpl 116(%esp),%edi movl %edi,112(%esp) jb .L002x86_outer_loop movl 104(%esp),%edi movl %ebx,12(%edi) movl %ecx,8(%edi) movl %edx,4(%edi) movl %ebp,(%edi) addl $84,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin .globl gcm_gmult_4bit_mmx .type gcm_gmult_4bit_mmx,@function .align 16 gcm_gmult_4bit_mmx: .L_gcm_gmult_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%esi call .L005pic_point .L005pic_point: popl %eax leal .Lrem_4bit-.L005pic_point(%eax),%eax movzbl 15(%edi),%ebx xorl %ecx,%ecx movl %ebx,%edx movb %dl,%cl movl $14,%ebp shlb $4,%cl andl $240,%edx movq 8(%esi,%ecx,1),%mm0 movq (%esi,%ecx,1),%mm1 movd %mm0,%ebx jmp .L006mmx_loop .align 16 .L006mmx_loop: psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 movb (%edi,%ebp,1),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 decl %ebp movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 movl %ecx,%edx pxor %mm2,%mm0 js .L007mmx_break shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 jmp .L006mmx_loop .align 16 .L007mmx_break: shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 pxor %mm2,%mm0 psrlq $32,%mm0 movd %mm1,%edx psrlq $32,%mm1 movd %mm0,%ecx movd %mm1,%ebp bswap %ebx bswap %edx bswap %ecx bswap %ebp emms movl %ebx,12(%edi) movl %edx,4(%edi) movl %ecx,8(%edi) movl %ebp,(%edi) popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin .globl gcm_ghash_4bit_mmx .type gcm_ghash_4bit_mmx,@function .align 16 gcm_ghash_4bit_mmx: .L_gcm_ghash_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%ebx movl 28(%esp),%ecx movl 32(%esp),%edx movl %esp,%ebp call .L008pic_point .L008pic_point: popl %esi leal .Lrem_8bit-.L008pic_point(%esi),%esi subl $544,%esp andl $-64,%esp subl $16,%esp addl %ecx,%edx movl %eax,544(%esp) movl %edx,552(%esp) movl %ebp,556(%esp) addl $128,%ebx leal 144(%esp),%edi leal 400(%esp),%ebp movl -120(%ebx),%edx movq -120(%ebx),%mm0 movq -128(%ebx),%mm3 shll $4,%edx movb %dl,(%esp) movl -104(%ebx),%edx movq -104(%ebx),%mm2 movq -112(%ebx),%mm5 movq %mm0,-128(%edi) psrlq $4,%mm0 movq %mm3,(%edi) movq %mm3,%mm7 psrlq $4,%mm3 shll $4,%edx movb %dl,1(%esp) movl -88(%ebx),%edx movq -88(%ebx),%mm1 psllq $60,%mm7 movq -96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-120(%edi) psrlq $4,%mm2 movq %mm5,8(%edi) movq %mm5,%mm6 movq %mm0,-128(%ebp) psrlq $4,%mm5 movq %mm3,(%ebp) shll $4,%edx movb %dl,2(%esp) movl -72(%ebx),%edx movq -72(%ebx),%mm0 psllq $60,%mm6 movq -80(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-112(%edi) psrlq $4,%mm1 movq %mm4,16(%edi) movq %mm4,%mm7 movq %mm2,-120(%ebp) psrlq $4,%mm4 movq %mm5,8(%ebp) shll $4,%edx movb %dl,3(%esp) movl -56(%ebx),%edx movq -56(%ebx),%mm2 psllq $60,%mm7 movq -64(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-104(%edi) psrlq $4,%mm0 movq %mm3,24(%edi) movq %mm3,%mm6 movq %mm1,-112(%ebp) psrlq $4,%mm3 movq %mm4,16(%ebp) shll $4,%edx movb %dl,4(%esp) movl -40(%ebx),%edx movq -40(%ebx),%mm1 psllq $60,%mm6 movq -48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-96(%edi) psrlq $4,%mm2 movq %mm5,32(%edi) movq %mm5,%mm7 movq %mm0,-104(%ebp) psrlq $4,%mm5 movq %mm3,24(%ebp) shll $4,%edx movb %dl,5(%esp) movl -24(%ebx),%edx movq -24(%ebx),%mm0 psllq $60,%mm7 movq -32(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-88(%edi) psrlq $4,%mm1 movq %mm4,40(%edi) movq %mm4,%mm6 movq %mm2,-96(%ebp) psrlq $4,%mm4 movq %mm5,32(%ebp) shll $4,%edx movb %dl,6(%esp) movl -8(%ebx),%edx movq -8(%ebx),%mm2 psllq $60,%mm6 movq -16(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-80(%edi) psrlq $4,%mm0 movq %mm3,48(%edi) movq %mm3,%mm7 movq %mm1,-88(%ebp) psrlq $4,%mm3 movq %mm4,40(%ebp) shll $4,%edx movb %dl,7(%esp) movl 8(%ebx),%edx movq 8(%ebx),%mm1 psllq $60,%mm7 movq (%ebx),%mm4 por %mm7,%mm0 movq %mm2,-72(%edi) psrlq $4,%mm2 movq %mm5,56(%edi) movq %mm5,%mm6 movq %mm0,-80(%ebp) psrlq $4,%mm5 movq %mm3,48(%ebp) shll $4,%edx movb %dl,8(%esp) movl 24(%ebx),%edx movq 24(%ebx),%mm0 psllq $60,%mm6 movq 16(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-64(%edi) psrlq $4,%mm1 movq %mm4,64(%edi) movq %mm4,%mm7 movq %mm2,-72(%ebp) psrlq $4,%mm4 movq %mm5,56(%ebp) shll $4,%edx movb %dl,9(%esp) movl 40(%ebx),%edx movq 40(%ebx),%mm2 psllq $60,%mm7 movq 32(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-56(%edi) psrlq $4,%mm0 movq %mm3,72(%edi) movq %mm3,%mm6 movq %mm1,-64(%ebp) psrlq $4,%mm3 movq %mm4,64(%ebp) shll $4,%edx movb %dl,10(%esp) movl 56(%ebx),%edx movq 56(%ebx),%mm1 psllq $60,%mm6 movq 48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-48(%edi) psrlq $4,%mm2 movq %mm5,80(%edi) movq %mm5,%mm7 movq %mm0,-56(%ebp) psrlq $4,%mm5 movq %mm3,72(%ebp) shll $4,%edx movb %dl,11(%esp) movl 72(%ebx),%edx movq 72(%ebx),%mm0 psllq $60,%mm7 movq 64(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-40(%edi) psrlq $4,%mm1 movq %mm4,88(%edi) movq %mm4,%mm6 movq %mm2,-48(%ebp) psrlq $4,%mm4 movq %mm5,80(%ebp) shll $4,%edx movb %dl,12(%esp) movl 88(%ebx),%edx movq 88(%ebx),%mm2 psllq $60,%mm6 movq 80(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-32(%edi) psrlq $4,%mm0 movq %mm3,96(%edi) movq %mm3,%mm7 movq %mm1,-40(%ebp) psrlq $4,%mm3 movq %mm4,88(%ebp) shll $4,%edx movb %dl,13(%esp) movl 104(%ebx),%edx movq 104(%ebx),%mm1 psllq $60,%mm7 movq 96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-24(%edi) psrlq $4,%mm2 movq %mm5,104(%edi) movq %mm5,%mm6 movq %mm0,-32(%ebp) psrlq $4,%mm5 movq %mm3,96(%ebp) shll $4,%edx movb %dl,14(%esp) movl 120(%ebx),%edx movq 120(%ebx),%mm0 psllq $60,%mm6 movq 112(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-16(%edi) psrlq $4,%mm1 movq %mm4,112(%edi) movq %mm4,%mm7 movq %mm2,-24(%ebp) psrlq $4,%mm4 movq %mm5,104(%ebp) shll $4,%edx movb %dl,15(%esp) psllq $60,%mm7 por %mm7,%mm1 movq %mm0,-8(%edi) psrlq $4,%mm0 movq %mm3,120(%edi) movq %mm3,%mm6 movq %mm1,-16(%ebp) psrlq $4,%mm3 movq %mm4,112(%ebp) psllq $60,%mm6 por %mm6,%mm0 movq %mm0,-8(%ebp) movq %mm3,120(%ebp) movq (%eax),%mm6 movl 8(%eax),%ebx movl 12(%eax),%edx .align 16 .L009outer: xorl 12(%ecx),%edx xorl 8(%ecx),%ebx pxor (%ecx),%mm6 leal 16(%ecx),%ecx movl %ebx,536(%esp) movq %mm6,528(%esp) movl %ecx,548(%esp) xorl %eax,%eax roll $8,%edx movb %dl,%al movl %eax,%ebp andb $15,%al shrl $4,%ebp pxor %mm0,%mm0 roll $8,%edx pxor %mm1,%mm1 pxor %mm2,%mm2 movq 16(%esp,%eax,8),%mm7 movq 144(%esp,%eax,8),%mm6 movb %dl,%al movd %mm7,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%edi pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 536(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 532(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 528(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 524(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 pxor 144(%esp,%eax,8),%mm6 xorb (%esp,%ebp,1),%bl pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 movzbl %bl,%ebx pxor %mm2,%mm2 psllq $4,%mm1 movd %mm7,%ecx psrlq $4,%mm7 movq %mm6,%mm3 psrlq $4,%mm6 shll $4,%ecx pxor 16(%esp,%edi,8),%mm7 psllq $60,%mm3 movzbl %cl,%ecx pxor %mm3,%mm7 pxor 144(%esp,%edi,8),%mm6 pinsrw $2,(%esi,%ebx,2),%mm0 pxor %mm1,%mm6 movd %mm7,%edx pinsrw $3,(%esi,%ecx,2),%mm2 psllq $12,%mm0 pxor %mm0,%mm6 psrlq $32,%mm7 pxor %mm2,%mm6 movl 548(%esp),%ecx movd %mm7,%ebx movq %mm6,%mm3 psllw $8,%mm6 psrlw $8,%mm3 por %mm3,%mm6 bswap %edx pshufw $27,%mm6,%mm6 bswap %ebx cmpl 552(%esp),%ecx jne .L009outer movl 544(%esp),%eax movl %edx,12(%eax) movl %ebx,8(%eax) movq %mm6,(%eax) movl 556(%esp),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin .globl gcm_init_clmul .type gcm_init_clmul,@function .align 16 gcm_init_clmul: .L_gcm_init_clmul_begin: movl 4(%esp),%edx movl 8(%esp),%eax call .L010pic .L010pic: popl %ecx leal .Lbswap-.L010pic(%ecx),%ecx movdqu (%eax),%xmm2 pshufd $78,%xmm2,%xmm2 pshufd $255,%xmm2,%xmm4 movdqa %xmm2,%xmm3 psllq $1,%xmm2 pxor %xmm5,%xmm5 psrlq $63,%xmm3 pcmpgtd %xmm4,%xmm5 pslldq $8,%xmm3 por %xmm3,%xmm2 pand 16(%ecx),%xmm5 pxor %xmm5,%xmm2 movdqa %xmm2,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm2,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm2,%xmm3 movdqu %xmm2,(%edx) pxor %xmm0,%xmm4 movdqu %xmm0,16(%edx) .byte 102,15,58,15,227,8 movdqu %xmm4,32(%edx) ret .size gcm_init_clmul,.-.L_gcm_init_clmul_begin .globl gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: .L_gcm_gmult_clmul_begin: movl 4(%esp),%eax movl 8(%esp),%edx call .L011pic .L011pic: popl %ecx leal .Lbswap-.L011pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movups (%edx),%xmm2 .byte 102,15,56,0,197 movups 32(%edx),%xmm4 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%eax) ret .size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin .globl gcm_ghash_clmul .type gcm_ghash_clmul,@function .align 16 gcm_ghash_clmul: .L_gcm_ghash_clmul_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%edx movl 28(%esp),%esi movl 32(%esp),%ebx call .L012pic .L012pic: popl %ecx leal .Lbswap-.L012pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movdqu (%edx),%xmm2 .byte 102,15,56,0,197 subl $16,%ebx jz .L013odd_tail movdqu (%esi),%xmm3 movdqu 16(%esi),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 movdqu 32(%edx),%xmm5 pxor %xmm3,%xmm0 pshufd $78,%xmm6,%xmm3 movdqa %xmm6,%xmm7 pxor %xmm6,%xmm3 leal 32(%esi),%esi .byte 102,15,58,68,242,0 .byte 102,15,58,68,250,17 .byte 102,15,58,68,221,0 movups 16(%edx),%xmm2 nop subl $32,%ebx jbe .L014even_tail jmp .L015mod_loop .align 32 .L015mod_loop: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 nop .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movups (%edx),%xmm2 xorps %xmm6,%xmm0 movdqa (%ecx),%xmm5 xorps %xmm7,%xmm1 movdqu (%esi),%xmm7 pxor %xmm0,%xmm3 movdqu 16(%esi),%xmm6 pxor %xmm1,%xmm3 .byte 102,15,56,0,253 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 .byte 102,15,56,0,245 pxor %xmm7,%xmm1 movdqa %xmm6,%xmm7 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 .byte 102,15,58,68,242,0 movups 32(%edx),%xmm5 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 pshufd $78,%xmm7,%xmm3 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm7,%xmm3 pxor %xmm4,%xmm1 .byte 102,15,58,68,250,17 movups 16(%edx),%xmm2 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,58,68,221,0 leal 32(%esi),%esi subl $32,%ebx ja .L015mod_loop .L014even_tail: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movdqa (%ecx),%xmm5 xorps %xmm6,%xmm0 xorps %xmm7,%xmm1 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 testl %ebx,%ebx jnz .L016done movups (%edx),%xmm2 .L013odd_tail: movdqu (%esi),%xmm3 .byte 102,15,56,0,221 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .L016done: .byte 102,15,56,0,197 movdqu %xmm0,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin .align 64 .Lbswap: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 64 .Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 .value 7200,7650,8100,7782,6952,6890,6316,6510 .value 4656,5106,4532,4214,5432,5370,5820,6014 .value 14400,14722,15300,14854,16200,16010,15564,15630 .value 13904,14226,13780,13334,12632,12442,13020,13086 .value 9312,9634,10212,9766,9064,8874,8428,8494 .value 10864,11186,10740,10294,11640,11450,12028,12094 .value 28800,28994,29444,29382,30600,30282,29708,30158 .value 32400,32594,32020,31958,31128,30810,31260,31710 .value 27808,28002,28452,28390,27560,27242,26668,27118 .value 25264,25458,24884,24822,26040,25722,26172,26622 .value 18624,18690,19268,19078,20424,19978,19532,19854 .value 18128,18194,17748,17558,16856,16410,16988,17310 .value 21728,21794,22372,22182,21480,21034,20588,20910 .value 23280,23346,22900,22710,24056,23610,24188,24510 .value 57600,57538,57988,58182,58888,59338,58764,58446 .value 61200,61138,60564,60758,59416,59866,60316,59998 .value 64800,64738,65188,65382,64040,64490,63916,63598 .value 62256,62194,61620,61814,62520,62970,63420,63102 .value 55616,55426,56004,56070,56904,57226,56780,56334 .value 55120,54930,54484,54550,53336,53658,54236,53790 .value 50528,50338,50916,50982,49768,50090,49644,49198 .value 52080,51890,51444,51510,52344,52666,53244,52798 .value 37248,36930,37380,37830,38536,38730,38156,38094 .value 40848,40530,39956,40406,39064,39258,39708,39646 .value 36256,35938,36388,36838,35496,35690,35116,35054 .value 33712,33394,32820,33270,33976,34170,34620,34558 .value 43456,43010,43588,43910,44744,44810,44364,44174 .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 .align 64 .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 .byte 0 #endif Index: head/secure/lib/libcrypto/i386/md5-586.S =================================================================== --- head/secure/lib/libcrypto/i386/md5-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/md5-586.S (revision 299481) @@ -1,1362 +1,1363 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from md5-586.pl. #ifdef PIC .file "md5-586.S" .text .globl md5_block_asm_data_order .type md5_block_asm_data_order,@function .align 16 md5_block_asm_data_order: .L_md5_block_asm_data_order_begin: pushl %esi pushl %edi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx pushl %ebp shll $6,%ecx pushl %ebx addl %esi,%ecx subl $64,%ecx movl (%edi),%eax pushl %ecx movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx .L000start: movl %ecx,%edi movl (%esi),%ebp xorl %edx,%edi andl %ebx,%edi leal 3614090360(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 4(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 3905402710(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 8(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 606105819(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 12(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 3250441966(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 16(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 4118548399(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 20(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 1200080426(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 24(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 2821735955(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 28(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 4249261313(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 32(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 1770035416(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 36(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 2336552879(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 40(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 4294925233(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 44(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 2304563134(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 48(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 1804603682(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 52(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 4254626195(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 56(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 2792965006(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 60(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 1236535329(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 4(%esi),%ebp addl %ecx,%ebx leal 4129170786(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 24(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 3225465664(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 44(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 643717713(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl (%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 3921069994(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 20(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 3593408605(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 40(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 38016083(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 60(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 3634488961(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 16(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 3889429448(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 36(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 568446438(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 56(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 3275163606(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 12(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 4107603335(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 32(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 1163531501(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 52(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 2850285829(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 8(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 4243563512(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 28(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 1735328473(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 48(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 2368359562(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 20(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 4294588738(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 32(%esi),%ebp movl %ebx,%edi leal 2272392833(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 44(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 1839030562(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 56(%esi),%ebp movl %edx,%edi leal 4259657740(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 4(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 2763975236(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 16(%esi),%ebp movl %ebx,%edi leal 1272893353(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 28(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 4139469664(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 40(%esi),%ebp movl %edx,%edi leal 3200236656(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 52(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 681279174(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl (%esi),%ebp movl %ebx,%edi leal 3936430074(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 12(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 3572445317(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 24(%esi),%ebp movl %edx,%edi leal 76029189(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 36(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 3654602809(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 48(%esi),%ebp movl %ebx,%edi leal 3873151461(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 60(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 530742520(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 8(%esi),%ebp movl %edx,%edi leal 3299628645(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl (%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi orl %ebx,%edi leal 4096336452(%eax,%ebp,1),%eax xorl %ecx,%edi movl 28(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 1126891415(%edx,%ebp,1),%edx xorl %ebx,%edi movl 56(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 2878612391(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 20(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 4237533241(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 48(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 1700485571(%eax,%ebp,1),%eax xorl %ecx,%edi movl 12(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 2399980690(%edx,%ebp,1),%edx xorl %ebx,%edi movl 40(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 4293915773(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 4(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 2240044497(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 32(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 1873313359(%eax,%ebp,1),%eax xorl %ecx,%edi movl 60(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 4264355552(%edx,%ebp,1),%edx xorl %ebx,%edi movl 24(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 2734768916(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 52(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 1309151649(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 16(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 4149444226(%eax,%ebp,1),%eax xorl %ecx,%edi movl 44(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 3174756917(%edx,%ebp,1),%edx xorl %ebx,%edi movl 8(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 718787259(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 36(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 3951481745(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 24(%esp),%ebp addl %edi,%ebx addl $64,%esi roll $21,%ebx movl (%ebp),%edi addl %ecx,%ebx addl %edi,%eax movl 4(%ebp),%edi addl %edi,%ebx movl 8(%ebp),%edi addl %edi,%ecx movl 12(%ebp),%edi addl %edi,%edx movl %eax,(%ebp) movl %ebx,4(%ebp) movl (%esp),%edi movl %ecx,8(%ebp) movl %edx,12(%ebp) cmpl %esi,%edi jae .L000start popl %eax popl %ebx popl %ebp popl %edi popl %esi ret .size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin #else .file "md5-586.S" .text .globl md5_block_asm_data_order .type md5_block_asm_data_order,@function .align 16 md5_block_asm_data_order: .L_md5_block_asm_data_order_begin: pushl %esi pushl %edi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx pushl %ebp shll $6,%ecx pushl %ebx addl %esi,%ecx subl $64,%ecx movl (%edi),%eax pushl %ecx movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx .L000start: movl %ecx,%edi movl (%esi),%ebp xorl %edx,%edi andl %ebx,%edi leal 3614090360(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 4(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 3905402710(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 8(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 606105819(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 12(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 3250441966(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 16(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 4118548399(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 20(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 1200080426(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 24(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 2821735955(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 28(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 4249261313(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 32(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 1770035416(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 36(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 2336552879(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 40(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 4294925233(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 44(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 2304563134(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 48(%esi),%ebp addl %ecx,%ebx xorl %edx,%edi andl %ebx,%edi leal 1804603682(%eax,%ebp,1),%eax xorl %edx,%edi addl %edi,%eax movl %ebx,%edi roll $7,%eax movl 52(%esi),%ebp addl %ebx,%eax xorl %ecx,%edi andl %eax,%edi leal 4254626195(%edx,%ebp,1),%edx xorl %ecx,%edi addl %edi,%edx movl %eax,%edi roll $12,%edx movl 56(%esi),%ebp addl %eax,%edx xorl %ebx,%edi andl %edx,%edi leal 2792965006(%ecx,%ebp,1),%ecx xorl %ebx,%edi addl %edi,%ecx movl %edx,%edi roll $17,%ecx movl 60(%esi),%ebp addl %edx,%ecx xorl %eax,%edi andl %ecx,%edi leal 1236535329(%ebx,%ebp,1),%ebx xorl %eax,%edi addl %edi,%ebx movl %ecx,%edi roll $22,%ebx movl 4(%esi),%ebp addl %ecx,%ebx leal 4129170786(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 24(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 3225465664(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 44(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 643717713(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl (%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 3921069994(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 20(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 3593408605(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 40(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 38016083(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 60(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 3634488961(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 16(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 3889429448(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 36(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 568446438(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 56(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 3275163606(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 12(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 4107603335(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 32(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 1163531501(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 52(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx leal 2850285829(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi movl 8(%esi),%ebp xorl %ecx,%edi addl %edi,%eax movl %ebx,%edi roll $5,%eax addl %ebx,%eax leal 4243563512(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi movl 28(%esi),%ebp xorl %ebx,%edi addl %edi,%edx movl %eax,%edi roll $9,%edx addl %eax,%edx leal 1735328473(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi movl 48(%esi),%ebp xorl %eax,%edi addl %edi,%ecx movl %edx,%edi roll $14,%ecx addl %edx,%ecx leal 2368359562(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi movl 20(%esi),%ebp xorl %edx,%edi addl %edi,%ebx movl %ecx,%edi roll $20,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 4294588738(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 32(%esi),%ebp movl %ebx,%edi leal 2272392833(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 44(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 1839030562(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 56(%esi),%ebp movl %edx,%edi leal 4259657740(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 4(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 2763975236(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 16(%esi),%ebp movl %ebx,%edi leal 1272893353(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 28(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 4139469664(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 40(%esi),%ebp movl %edx,%edi leal 3200236656(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 52(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 681279174(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl (%esi),%ebp movl %ebx,%edi leal 3936430074(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 12(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 3572445317(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 24(%esi),%ebp movl %edx,%edi leal 76029189(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl 36(%esi),%ebp addl %edi,%ebx movl %ecx,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi xorl %ebx,%edi leal 3654602809(%eax,%ebp,1),%eax addl %edi,%eax roll $4,%eax movl 48(%esi),%ebp movl %ebx,%edi leal 3873151461(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi xorl %eax,%edi movl 60(%esi),%ebp addl %edi,%edx movl %eax,%edi roll $11,%edx addl %eax,%edx xorl %ebx,%edi xorl %edx,%edi leal 530742520(%ecx,%ebp,1),%ecx addl %edi,%ecx roll $16,%ecx movl 8(%esi),%ebp movl %edx,%edi leal 3299628645(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi xorl %ecx,%edi movl (%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $23,%ebx addl %ecx,%ebx xorl %edx,%edi orl %ebx,%edi leal 4096336452(%eax,%ebp,1),%eax xorl %ecx,%edi movl 28(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 1126891415(%edx,%ebp,1),%edx xorl %ebx,%edi movl 56(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 2878612391(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 20(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 4237533241(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 48(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 1700485571(%eax,%ebp,1),%eax xorl %ecx,%edi movl 12(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 2399980690(%edx,%ebp,1),%edx xorl %ebx,%edi movl 40(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 4293915773(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 4(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 2240044497(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 32(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 1873313359(%eax,%ebp,1),%eax xorl %ecx,%edi movl 60(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 4264355552(%edx,%ebp,1),%edx xorl %ebx,%edi movl 24(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 2734768916(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 52(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 1309151649(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 16(%esi),%ebp addl %edi,%ebx movl $-1,%edi roll $21,%ebx xorl %edx,%edi addl %ecx,%ebx orl %ebx,%edi leal 4149444226(%eax,%ebp,1),%eax xorl %ecx,%edi movl 44(%esi),%ebp addl %edi,%eax movl $-1,%edi roll $6,%eax xorl %ecx,%edi addl %ebx,%eax orl %eax,%edi leal 3174756917(%edx,%ebp,1),%edx xorl %ebx,%edi movl 8(%esi),%ebp addl %edi,%edx movl $-1,%edi roll $10,%edx xorl %ebx,%edi addl %eax,%edx orl %edx,%edi leal 718787259(%ecx,%ebp,1),%ecx xorl %eax,%edi movl 36(%esi),%ebp addl %edi,%ecx movl $-1,%edi roll $15,%ecx xorl %eax,%edi addl %edx,%ecx orl %ecx,%edi leal 3951481745(%ebx,%ebp,1),%ebx xorl %edx,%edi movl 24(%esp),%ebp addl %edi,%ebx addl $64,%esi roll $21,%ebx movl (%ebp),%edi addl %ecx,%ebx addl %edi,%eax movl 4(%ebp),%edi addl %edi,%ebx movl 8(%ebp),%edi addl %edi,%ecx movl 12(%ebp),%edi addl %edi,%edx movl %eax,(%ebp) movl %ebx,4(%ebp) movl (%esp),%edi movl %ecx,8(%ebp) movl %edx,12(%ebp) cmpl %esi,%edi jae .L000start popl %eax popl %ebx popl %ebp popl %edi popl %esi ret .size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin #endif Index: head/secure/lib/libcrypto/i386/rc4-586.S =================================================================== --- head/secure/lib/libcrypto/i386/rc4-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/rc4-586.S (revision 299481) @@ -1,757 +1,758 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rc4-586.pl. #ifdef PIC .file "rc4-586.S" .text .globl RC4 .type RC4,@function .align 16 RC4: .L_RC4_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%edx movl 28(%esp),%esi movl 32(%esp),%ebp xorl %eax,%eax xorl %ebx,%ebx cmpl $0,%edx je .L000abort movb (%edi),%al movb 4(%edi),%bl addl $8,%edi leal (%esi,%edx,1),%ecx subl %esi,%ebp movl %ecx,24(%esp) incb %al cmpl $-1,256(%edi) je .L001RC4_CHAR movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 movl %ebp,32(%esp) testl $-8,%edx jz .L003go4loop4 call .L004PIC_me_up .L004PIC_me_up: popl %ebp leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp btl $26,(%ebp) jnc .L003go4loop4 movl 32(%esp),%ebp andl $-8,%edx leal -8(%esi,%edx,1),%edx movl %edx,-4(%edi) addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx movq (%esi),%mm0 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm2 jmp .L005loop_mmx_enter .align 16 .L006loop_mmx: addb %cl,%bl psllq $56,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movq (%esi),%mm0 movq %mm2,-8(%ebp,%esi,1) movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm2 .L005loop_mmx_enter: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm0,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $8,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $16,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $24,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $32,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $40,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $48,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 movl %ebx,%edx xorl %ebx,%ebx movb %dl,%bl cmpl -4(%edi),%esi leal 8(%esi),%esi jb .L006loop_mmx psllq $56,%mm1 pxor %mm1,%mm2 movq %mm2,-8(%ebp,%esi,1) emms cmpl 24(%esp),%esi je .L007done jmp .L002loop1 .align 16 .L003go4loop4: leal -4(%esi,%edx,1),%edx movl %edx,28(%esp) .L008loop4: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx movl (%edi,%eax,4),%ecx movl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl (%edi,%eax,4),%ecx orl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl (%edi,%eax,4),%ecx orl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl 32(%esp),%ecx orl (%edi,%edx,4),%ebp rorl $8,%ebp xorl (%esi),%ebp cmpl 28(%esp),%esi movl %ebp,(%ecx,%esi,1) leal 4(%esi),%esi movl (%edi,%eax,4),%ecx jb .L008loop4 cmpl 24(%esp),%esi je .L007done movl 32(%esp),%ebp .align 16 .L002loop1: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx movl (%edi,%edx,4),%edx xorb (%esi),%dl leal 1(%esi),%esi movl (%edi,%eax,4),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L002loop1 jmp .L007done .align 16 .L001RC4_CHAR: movzbl (%edi,%eax,1),%ecx .L009cloop1: addb %cl,%bl movzbl (%edi,%ebx,1),%edx movb %cl,(%edi,%ebx,1) movb %dl,(%edi,%eax,1) addb %cl,%dl movzbl (%edi,%edx,1),%edx addb $1,%al xorb (%esi),%dl leal 1(%esi),%esi movzbl (%edi,%eax,1),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L009cloop1 .L007done: decb %al movl %ebx,-4(%edi) movb %al,-8(%edi) .L000abort: popl %edi popl %esi popl %ebx popl %ebp ret .size RC4,.-.L_RC4_begin .globl private_RC4_set_key .type private_RC4_set_key,@function .align 16 private_RC4_set_key: .L_private_RC4_set_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%ebp movl 28(%esp),%esi call .L010PIC_me_up .L010PIC_me_up: popl %edx leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx leal 8(%edi),%edi leal (%esi,%ebp,1),%esi negl %ebp xorl %eax,%eax movl %ebp,-4(%edi) btl $20,(%edx) jc .L011c1stloop .align 16 .L012w1stloop: movl %eax,(%edi,%eax,4) addb $1,%al jnc .L012w1stloop xorl %ecx,%ecx xorl %edx,%edx .align 16 .L013w2ndloop: movl (%edi,%ecx,4),%eax addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movl (%edi,%edx,4),%ebx jnz .L014wnowrap movl -4(%edi),%ebp .L014wnowrap: movl %eax,(%edi,%edx,4) movl %ebx,(%edi,%ecx,4) addb $1,%cl jnc .L013w2ndloop jmp .L015exit .align 16 .L011c1stloop: movb %al,(%edi,%eax,1) addb $1,%al jnc .L011c1stloop xorl %ecx,%ecx xorl %edx,%edx xorl %ebx,%ebx .align 16 .L016c2ndloop: movb (%edi,%ecx,1),%al addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movb (%edi,%edx,1),%bl jnz .L017cnowrap movl -4(%edi),%ebp .L017cnowrap: movb %al,(%edi,%edx,1) movb %bl,(%edi,%ecx,1) addb $1,%cl jnc .L016c2ndloop movl $-1,256(%edi) .L015exit: xorl %eax,%eax movl %eax,-8(%edi) movl %eax,-4(%edi) popl %edi popl %esi popl %ebx popl %ebp ret .size private_RC4_set_key,.-.L_private_RC4_set_key_begin .globl RC4_options .type RC4_options,@function .align 16 RC4_options: .L_RC4_options_begin: call .L018pic_point .L018pic_point: popl %eax leal .L019opts-.L018pic_point(%eax),%eax call .L020PIC_me_up .L020PIC_me_up: popl %edx leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx movl (%edx),%edx btl $20,%edx jc .L0211xchar btl $26,%edx jnc .L022ret addl $25,%eax ret .L0211xchar: addl $12,%eax .L022ret: ret .align 64 .L019opts: .byte 114,99,52,40,52,120,44,105,110,116,41,0 .byte 114,99,52,40,49,120,44,99,104,97,114,41,0 .byte 114,99,52,40,56,120,44,109,109,120,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin .comm OPENSSL_ia32cap_P,16,4 #else .file "rc4-586.S" .text .globl RC4 .type RC4,@function .align 16 RC4: .L_RC4_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%edx movl 28(%esp),%esi movl 32(%esp),%ebp xorl %eax,%eax xorl %ebx,%ebx cmpl $0,%edx je .L000abort movb (%edi),%al movb 4(%edi),%bl addl $8,%edi leal (%esi,%edx,1),%ecx subl %esi,%ebp movl %ecx,24(%esp) incb %al cmpl $-1,256(%edi) je .L001RC4_CHAR movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 movl %ebp,32(%esp) testl $-8,%edx jz .L003go4loop4 leal OPENSSL_ia32cap_P,%ebp btl $26,(%ebp) jnc .L003go4loop4 movl 32(%esp),%ebp andl $-8,%edx leal -8(%esi,%edx,1),%edx movl %edx,-4(%edi) addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx movq (%esi),%mm0 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm2 jmp .L004loop_mmx_enter .align 16 .L005loop_mmx: addb %cl,%bl psllq $56,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movq (%esi),%mm0 movq %mm2,-8(%ebp,%esi,1) movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm2 .L004loop_mmx_enter: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm0,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $8,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $16,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $24,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $32,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $40,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 addb %cl,%bl psllq $48,%mm1 movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) incl %eax addl %ecx,%edx movzbl %al,%eax movzbl %dl,%edx pxor %mm1,%mm2 movl (%edi,%eax,4),%ecx movd (%edi,%edx,4),%mm1 movl %ebx,%edx xorl %ebx,%ebx movb %dl,%bl cmpl -4(%edi),%esi leal 8(%esi),%esi jb .L005loop_mmx psllq $56,%mm1 pxor %mm1,%mm2 movq %mm2,-8(%ebp,%esi,1) emms cmpl 24(%esp),%esi je .L006done jmp .L002loop1 .align 16 .L003go4loop4: leal -4(%esi,%edx,1),%edx movl %edx,28(%esp) .L007loop4: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx movl (%edi,%eax,4),%ecx movl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl (%edi,%eax,4),%ecx orl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl (%edi,%eax,4),%ecx orl (%edi,%edx,4),%ebp addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx rorl $8,%ebp movl 32(%esp),%ecx orl (%edi,%edx,4),%ebp rorl $8,%ebp xorl (%esi),%ebp cmpl 28(%esp),%esi movl %ebp,(%ecx,%esi,1) leal 4(%esi),%esi movl (%edi,%eax,4),%ecx jb .L007loop4 cmpl 24(%esp),%esi je .L006done movl 32(%esp),%ebp .align 16 .L002loop1: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) movl %edx,(%edi,%eax,4) addl %ecx,%edx incb %al andl $255,%edx movl (%edi,%edx,4),%edx xorb (%esi),%dl leal 1(%esi),%esi movl (%edi,%eax,4),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L002loop1 jmp .L006done .align 16 .L001RC4_CHAR: movzbl (%edi,%eax,1),%ecx .L008cloop1: addb %cl,%bl movzbl (%edi,%ebx,1),%edx movb %cl,(%edi,%ebx,1) movb %dl,(%edi,%eax,1) addb %cl,%dl movzbl (%edi,%edx,1),%edx addb $1,%al xorb (%esi),%dl leal 1(%esi),%esi movzbl (%edi,%eax,1),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L008cloop1 .L006done: decb %al movl %ebx,-4(%edi) movb %al,-8(%edi) .L000abort: popl %edi popl %esi popl %ebx popl %ebp ret .size RC4,.-.L_RC4_begin .globl private_RC4_set_key .type private_RC4_set_key,@function .align 16 private_RC4_set_key: .L_private_RC4_set_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%ebp movl 28(%esp),%esi leal OPENSSL_ia32cap_P,%edx leal 8(%edi),%edi leal (%esi,%ebp,1),%esi negl %ebp xorl %eax,%eax movl %ebp,-4(%edi) btl $20,(%edx) jc .L009c1stloop .align 16 .L010w1stloop: movl %eax,(%edi,%eax,4) addb $1,%al jnc .L010w1stloop xorl %ecx,%ecx xorl %edx,%edx .align 16 .L011w2ndloop: movl (%edi,%ecx,4),%eax addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movl (%edi,%edx,4),%ebx jnz .L012wnowrap movl -4(%edi),%ebp .L012wnowrap: movl %eax,(%edi,%edx,4) movl %ebx,(%edi,%ecx,4) addb $1,%cl jnc .L011w2ndloop jmp .L013exit .align 16 .L009c1stloop: movb %al,(%edi,%eax,1) addb $1,%al jnc .L009c1stloop xorl %ecx,%ecx xorl %edx,%edx xorl %ebx,%ebx .align 16 .L014c2ndloop: movb (%edi,%ecx,1),%al addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movb (%edi,%edx,1),%bl jnz .L015cnowrap movl -4(%edi),%ebp .L015cnowrap: movb %al,(%edi,%edx,1) movb %bl,(%edi,%ecx,1) addb $1,%cl jnc .L014c2ndloop movl $-1,256(%edi) .L013exit: xorl %eax,%eax movl %eax,-8(%edi) movl %eax,-4(%edi) popl %edi popl %esi popl %ebx popl %ebp ret .size private_RC4_set_key,.-.L_private_RC4_set_key_begin .globl RC4_options .type RC4_options,@function .align 16 RC4_options: .L_RC4_options_begin: call .L016pic_point .L016pic_point: popl %eax leal .L017opts-.L016pic_point(%eax),%eax leal OPENSSL_ia32cap_P,%edx movl (%edx),%edx btl $20,%edx jc .L0181xchar btl $26,%edx jnc .L019ret addl $25,%eax ret .L0181xchar: addl $12,%eax .L019ret: ret .align 64 .L017opts: .byte 114,99,52,40,52,120,44,105,110,116,41,0 .byte 114,99,52,40,49,120,44,99,104,97,114,41,0 .byte 114,99,52,40,56,120,44,109,109,120,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/rc5-586.S =================================================================== --- head/secure/lib/libcrypto/i386/rc5-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/rc5-586.S (revision 299481) @@ -1,1132 +1,1133 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rc5-586.pl. #ifdef PIC .file "rc5-586.S" .text .globl RC5_32_encrypt .type RC5_32_encrypt,@function .align 16 RC5_32_encrypt: .L_RC5_32_encrypt_begin: pushl %ebp pushl %esi pushl %edi movl 16(%esp),%edx movl 20(%esp),%ebp movl (%edx),%edi movl 4(%edx),%esi pushl %ebx movl (%ebp),%ebx addl 4(%ebp),%edi addl 8(%ebp),%esi xorl %esi,%edi movl 12(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 16(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 20(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 24(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 28(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 32(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 36(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 40(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 44(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 48(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 52(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 56(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 60(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 64(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 68(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 72(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi cmpl $8,%ebx je .L000rc5_exit xorl %esi,%edi movl 76(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 80(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 84(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 88(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 92(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 96(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 100(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 104(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi cmpl $12,%ebx je .L000rc5_exit xorl %esi,%edi movl 108(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 112(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 116(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 120(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 124(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 128(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 132(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 136(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi .L000rc5_exit: movl %edi,(%edx) movl %esi,4(%edx) popl %ebx popl %edi popl %esi popl %ebp ret .size RC5_32_encrypt,.-.L_RC5_32_encrypt_begin .globl RC5_32_decrypt .type RC5_32_decrypt,@function .align 16 RC5_32_decrypt: .L_RC5_32_decrypt_begin: pushl %ebp pushl %esi pushl %edi movl 16(%esp),%edx movl 20(%esp),%ebp movl (%edx),%edi movl 4(%edx),%esi pushl %ebx movl (%ebp),%ebx cmpl $12,%ebx je .L001rc5_dec_12 cmpl $8,%ebx je .L002rc5_dec_8 movl 136(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 132(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 128(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 124(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 120(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 116(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 112(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 108(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi .L001rc5_dec_12: movl 104(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 100(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 96(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 92(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 88(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 84(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 80(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 76(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi .L002rc5_dec_8: movl 72(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 68(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 64(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 60(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 56(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 52(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 48(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 44(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 40(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 36(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 32(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 28(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 24(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 20(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 16(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 12(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi subl 8(%ebp),%esi subl 4(%ebp),%edi .L003rc5_exit: movl %edi,(%edx) movl %esi,4(%edx) popl %ebx popl %edi popl %esi popl %ebp ret .size RC5_32_decrypt,.-.L_RC5_32_decrypt_begin .globl RC5_32_cbc_encrypt .type RC5_32_cbc_encrypt,@function .align 16 RC5_32_cbc_encrypt: .L_RC5_32_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L004decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L005encrypt_finish .L006encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L006encrypt_loop .L005encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L007finish call .L008PIC_point .L008PIC_point: popl %edx leal .L009cbc_enc_jmp_table-.L008PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L010ej7: movb 6(%esi),%dh shll $8,%edx .L011ej6: movb 5(%esi),%dh .L012ej5: movb 4(%esi),%dl .L013ej4: movl (%esi),%ecx jmp .L014ejend .L015ej3: movb 2(%esi),%ch shll $8,%ecx .L016ej2: movb 1(%esi),%ch .L017ej1: movb (%esi),%cl .L014ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L007finish .L004decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L018decrypt_finish .L019decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L019decrypt_loop .L018decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L007finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L020dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L021dj6: movb %dh,5(%edi) .L022dj5: movb %dl,4(%edi) .L023dj4: movl %ecx,(%edi) jmp .L024djend .L025dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L026dj2: movb %ch,1(%esi) .L027dj1: movb %cl,(%esi) .L024djend: jmp .L007finish .L007finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L009cbc_enc_jmp_table: .long 0 .long .L017ej1-.L008PIC_point .long .L016ej2-.L008PIC_point .long .L015ej3-.L008PIC_point .long .L013ej4-.L008PIC_point .long .L012ej5-.L008PIC_point .long .L011ej6-.L008PIC_point .long .L010ej7-.L008PIC_point .align 64 .size RC5_32_cbc_encrypt,.-.L_RC5_32_cbc_encrypt_begin #else .file "rc5-586.S" .text .globl RC5_32_encrypt .type RC5_32_encrypt,@function .align 16 RC5_32_encrypt: .L_RC5_32_encrypt_begin: pushl %ebp pushl %esi pushl %edi movl 16(%esp),%edx movl 20(%esp),%ebp movl (%edx),%edi movl 4(%edx),%esi pushl %ebx movl (%ebp),%ebx addl 4(%ebp),%edi addl 8(%ebp),%esi xorl %esi,%edi movl 12(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 16(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 20(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 24(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 28(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 32(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 36(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 40(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 44(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 48(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 52(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 56(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 60(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 64(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 68(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 72(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi cmpl $8,%ebx je .L000rc5_exit xorl %esi,%edi movl 76(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 80(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 84(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 88(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 92(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 96(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 100(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 104(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi cmpl $12,%ebx je .L000rc5_exit xorl %esi,%edi movl 108(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 112(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 116(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 120(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 124(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 128(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi xorl %esi,%edi movl 132(%ebp),%eax movl %esi,%ecx roll %cl,%edi addl %eax,%edi xorl %edi,%esi movl 136(%ebp),%eax movl %edi,%ecx roll %cl,%esi addl %eax,%esi .L000rc5_exit: movl %edi,(%edx) movl %esi,4(%edx) popl %ebx popl %edi popl %esi popl %ebp ret .size RC5_32_encrypt,.-.L_RC5_32_encrypt_begin .globl RC5_32_decrypt .type RC5_32_decrypt,@function .align 16 RC5_32_decrypt: .L_RC5_32_decrypt_begin: pushl %ebp pushl %esi pushl %edi movl 16(%esp),%edx movl 20(%esp),%ebp movl (%edx),%edi movl 4(%edx),%esi pushl %ebx movl (%ebp),%ebx cmpl $12,%ebx je .L001rc5_dec_12 cmpl $8,%ebx je .L002rc5_dec_8 movl 136(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 132(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 128(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 124(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 120(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 116(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 112(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 108(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi .L001rc5_dec_12: movl 104(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 100(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 96(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 92(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 88(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 84(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 80(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 76(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi .L002rc5_dec_8: movl 72(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 68(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 64(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 60(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 56(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 52(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 48(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 44(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 40(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 36(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 32(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 28(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 24(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 20(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi movl 16(%ebp),%eax subl %eax,%esi movl %edi,%ecx rorl %cl,%esi xorl %edi,%esi movl 12(%ebp),%eax subl %eax,%edi movl %esi,%ecx rorl %cl,%edi xorl %esi,%edi subl 8(%ebp),%esi subl 4(%ebp),%edi .L003rc5_exit: movl %edi,(%edx) movl %esi,4(%edx) popl %ebx popl %edi popl %esi popl %ebp ret .size RC5_32_decrypt,.-.L_RC5_32_decrypt_begin .globl RC5_32_cbc_encrypt .type RC5_32_cbc_encrypt,@function .align 16 RC5_32_cbc_encrypt: .L_RC5_32_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi pushl %edi pushl %esi pushl %edi pushl %esi movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi movl 56(%esp),%ecx movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx jz .L004decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx jz .L005encrypt_finish .L006encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax xorl %edx,%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L006encrypt_loop .L005encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L007finish call .L008PIC_point .L008PIC_point: popl %edx leal .L009cbc_enc_jmp_table-.L008PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp .L010ej7: movb 6(%esi),%dh shll $8,%edx .L011ej6: movb 5(%esi),%dh .L012ej5: movb 4(%esi),%dl .L013ej4: movl (%esi),%ecx jmp .L014ejend .L015ej3: movb 2(%esi),%ch shll $8,%ecx .L016ej2: movb 1(%esi),%ch .L017ej1: movb (%esi),%cl .L014ejend: xorl %ecx,%eax xorl %edx,%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl %eax,(%edi) movl %ebx,4(%edi) jmp .L007finish .L004decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx jz .L018decrypt_finish .L019decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx movl %ecx,(%edi) movl %edx,4(%edi) movl %eax,16(%esp) movl %ebx,20(%esp) addl $8,%esi addl $8,%edi subl $8,%ebp jnz .L019decrypt_loop .L018decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp jz .L007finish movl (%esi),%eax movl 4(%esi),%ebx movl %eax,8(%esp) movl %ebx,12(%esp) call .L_RC5_32_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx movl 16(%esp),%ecx movl 20(%esp),%edx xorl %eax,%ecx xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx .L020dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx .L021dj6: movb %dh,5(%edi) .L022dj5: movb %dl,4(%edi) .L023dj4: movl %ecx,(%edi) jmp .L024djend .L025dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx .L026dj2: movb %ch,1(%esi) .L027dj1: movb %cl,(%esi) .L024djend: jmp .L007finish .L007finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) movl %ebx,4(%ecx) popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L009cbc_enc_jmp_table: .long 0 .long .L017ej1-.L008PIC_point .long .L016ej2-.L008PIC_point .long .L015ej3-.L008PIC_point .long .L013ej4-.L008PIC_point .long .L012ej5-.L008PIC_point .long .L011ej6-.L008PIC_point .long .L010ej7-.L008PIC_point .align 64 .size RC5_32_cbc_encrypt,.-.L_RC5_32_cbc_encrypt_begin #endif Index: head/secure/lib/libcrypto/i386/rmd-586.S =================================================================== --- head/secure/lib/libcrypto/i386/rmd-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/rmd-586.S (revision 299481) @@ -1,3934 +1,3935 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from rmd-586.pl. #ifdef PIC .file "rmd-586.S" .text .globl ripemd160_block_asm_data_order .type ripemd160_block_asm_data_order,@function .align 16 ripemd160_block_asm_data_order: .L_ripemd160_block_asm_data_order_begin: movl 4(%esp),%edx movl 8(%esp),%eax pushl %esi movl (%edx),%ecx pushl %edi movl 4(%edx),%esi pushl %ebp movl 8(%edx),%edi pushl %ebx subl $108,%esp .L000start: movl (%eax),%ebx movl 4(%eax),%ebp movl %ebx,(%esp) movl %ebp,4(%esp) movl 8(%eax),%ebx movl 12(%eax),%ebp movl %ebx,8(%esp) movl %ebp,12(%esp) movl 16(%eax),%ebx movl 20(%eax),%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl 24(%eax),%ebx movl 28(%eax),%ebp movl %ebx,24(%esp) movl %ebp,28(%esp) movl 32(%eax),%ebx movl 36(%eax),%ebp movl %ebx,32(%esp) movl %ebp,36(%esp) movl 40(%eax),%ebx movl 44(%eax),%ebp movl %ebx,40(%esp) movl %ebp,44(%esp) movl 48(%eax),%ebx movl 52(%eax),%ebp movl %ebx,48(%esp) movl %ebp,52(%esp) movl 56(%eax),%ebx movl 60(%eax),%ebp movl %ebx,56(%esp) movl %ebp,60(%esp) movl %edi,%eax movl 12(%edx),%ebx movl 16(%edx),%ebp xorl %ebx,%eax movl (%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $11,%ecx addl %ebp,%ecx xorl %edi,%eax movl 4(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $14,%ebp addl %ebx,%ebp movl 8(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $15,%ebx addl %edi,%ebx xorl %ecx,%eax movl 12(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $12,%edi addl %esi,%edi movl 16(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $5,%esi addl %ecx,%esi xorl %ebx,%eax movl 20(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 24(%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $7,%ebp addl %ebx,%ebp xorl %esi,%eax movl 28(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl 32(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $11,%edi addl %esi,%edi xorl %ebp,%eax movl 36(%esp),%edx xorl %edi,%eax addl %eax,%esi movl %edi,%eax roll $10,%ebx addl %edx,%esi xorl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 40(%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $14,%ecx addl %ebp,%ecx xorl %edi,%eax movl 44(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $15,%ebp addl %ebx,%ebp movl 48(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $6,%ebx addl %edi,%ebx xorl %ecx,%eax movl 52(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $7,%edi addl %esi,%edi movl 56(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $9,%esi addl %ecx,%esi xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl $-1,%eax roll $10,%edi addl %edx,%ecx movl 28(%esp),%edx roll $8,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 16(%esp),%eax roll $10,%esi leal 1518500249(%ebp,%edx,1),%ebp movl $-1,%edx roll $7,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 52(%esp),%edx roll $10,%ecx leal 1518500249(%ebx,%eax,1),%ebx movl $-1,%eax roll $6,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 4(%esp),%eax roll $10,%ebp leal 1518500249(%edi,%edx,1),%edi movl $-1,%edx roll $8,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 40(%esp),%edx roll $10,%ebx leal 1518500249(%esi,%eax,1),%esi movl $-1,%eax roll $13,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 24(%esp),%eax roll $10,%edi leal 1518500249(%ecx,%edx,1),%ecx movl $-1,%edx roll $11,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 60(%esp),%edx roll $10,%esi leal 1518500249(%ebp,%eax,1),%ebp movl $-1,%eax roll $9,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 12(%esp),%eax roll $10,%ecx leal 1518500249(%ebx,%edx,1),%ebx movl $-1,%edx roll $7,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %edx,%eax movl 48(%esp),%edx roll $10,%ebp leal 1518500249(%edi,%eax,1),%edi movl $-1,%eax roll $15,%edi addl %esi,%edi addl %edx,%esi movl %ebx,%edx subl %edi,%eax andl %edi,%edx andl %ebp,%eax orl %eax,%edx movl (%esp),%eax roll $10,%ebx leal 1518500249(%esi,%edx,1),%esi movl $-1,%edx roll $7,%esi addl %ecx,%esi addl %eax,%ecx movl %edi,%eax subl %esi,%edx andl %esi,%eax andl %ebx,%edx orl %edx,%eax movl 36(%esp),%edx roll $10,%edi leal 1518500249(%ecx,%eax,1),%ecx movl $-1,%eax roll $12,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 20(%esp),%eax roll $10,%esi leal 1518500249(%ebp,%edx,1),%ebp movl $-1,%edx roll $15,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 8(%esp),%edx roll $10,%ecx leal 1518500249(%ebx,%eax,1),%ebx movl $-1,%eax roll $9,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 56(%esp),%eax roll $10,%ebp leal 1518500249(%edi,%edx,1),%edi movl $-1,%edx roll $11,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 44(%esp),%edx roll $10,%ebx leal 1518500249(%esi,%eax,1),%esi movl $-1,%eax roll $7,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 32(%esp),%eax roll $10,%edi leal 1518500249(%ecx,%edx,1),%ecx movl $-1,%edx roll $13,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl $-1,%edx roll $10,%esi leal 1518500249(%ebp,%eax,1),%ebp subl %ecx,%edx roll $12,%ebp addl %ebx,%ebp movl 12(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1859775393(%ebx,%edx,1),%ebx subl %ebp,%eax roll $11,%ebx addl %edi,%ebx movl 40(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1859775393(%edi,%eax,1),%edi subl %ebx,%edx roll $13,%edi addl %esi,%edi movl 56(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1859775393(%esi,%edx,1),%esi subl %edi,%eax roll $6,%esi addl %ecx,%esi movl 16(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1859775393(%ecx,%eax,1),%ecx subl %esi,%edx roll $7,%ecx addl %ebp,%ecx movl 36(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1859775393(%ebp,%edx,1),%ebp subl %ecx,%eax roll $14,%ebp addl %ebx,%ebp movl 60(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1859775393(%ebx,%eax,1),%ebx subl %ebp,%edx roll $9,%ebx addl %edi,%ebx movl 32(%esp),%eax orl %ebx,%edx addl %eax,%edi xorl %ecx,%edx movl $-1,%eax roll $10,%ebp leal 1859775393(%edi,%edx,1),%edi subl %ebx,%eax roll $13,%edi addl %esi,%edi movl 4(%esp),%edx orl %edi,%eax addl %edx,%esi xorl %ebp,%eax movl $-1,%edx roll $10,%ebx leal 1859775393(%esi,%eax,1),%esi subl %edi,%edx roll $15,%esi addl %ecx,%esi movl 8(%esp),%eax orl %esi,%edx addl %eax,%ecx xorl %ebx,%edx movl $-1,%eax roll $10,%edi leal 1859775393(%ecx,%edx,1),%ecx subl %esi,%eax roll $14,%ecx addl %ebp,%ecx movl 28(%esp),%edx orl %ecx,%eax addl %edx,%ebp xorl %edi,%eax movl $-1,%edx roll $10,%esi leal 1859775393(%ebp,%eax,1),%ebp subl %ecx,%edx roll $8,%ebp addl %ebx,%ebp movl (%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1859775393(%ebx,%edx,1),%ebx subl %ebp,%eax roll $13,%ebx addl %edi,%ebx movl 24(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1859775393(%edi,%eax,1),%edi subl %ebx,%edx roll $6,%edi addl %esi,%edi movl 52(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1859775393(%esi,%edx,1),%esi subl %edi,%eax roll $5,%esi addl %ecx,%esi movl 44(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1859775393(%ecx,%eax,1),%ecx subl %esi,%edx roll $12,%ecx addl %ebp,%ecx movl 20(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1859775393(%ebp,%edx,1),%ebp subl %ecx,%eax roll $7,%ebp addl %ebx,%ebp movl 48(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1859775393(%ebx,%eax,1),%ebx movl %ecx,%eax roll $5,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 4(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $11,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 36(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $12,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 44(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $14,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 40(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $15,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl (%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $14,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 32(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $15,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 48(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $9,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 16(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $8,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 52(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $9,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 12(%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $14,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 28(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $5,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 60(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $6,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 56(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $8,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 20(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $6,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 24(%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $5,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 8(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi subl %ebp,%edx roll $12,%edi addl %esi,%edi movl 16(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 2840853838(%esi,%edx,1),%esi subl %ebx,%eax roll $9,%esi addl %ecx,%esi movl (%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 2840853838(%ecx,%eax,1),%ecx subl %edi,%edx roll $15,%ecx addl %ebp,%ecx movl 20(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 2840853838(%ebp,%edx,1),%ebp subl %esi,%eax roll $5,%ebp addl %ebx,%ebp movl 36(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 2840853838(%ebx,%eax,1),%ebx subl %ecx,%edx roll $11,%ebx addl %edi,%ebx movl 28(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 2840853838(%edi,%edx,1),%edi subl %ebp,%eax roll $6,%edi addl %esi,%edi movl 48(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl $-1,%edx roll $10,%ebx leal 2840853838(%esi,%eax,1),%esi subl %ebx,%edx roll $8,%esi addl %ecx,%esi movl 8(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 2840853838(%ecx,%edx,1),%ecx subl %edi,%eax roll $13,%ecx addl %ebp,%ecx movl 40(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 2840853838(%ebp,%eax,1),%ebp subl %esi,%edx roll $12,%ebp addl %ebx,%ebp movl 56(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 2840853838(%ebx,%edx,1),%ebx subl %ecx,%eax roll $5,%ebx addl %edi,%ebx movl 4(%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 2840853838(%edi,%eax,1),%edi subl %ebp,%edx roll $12,%edi addl %esi,%edi movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 2840853838(%esi,%edx,1),%esi subl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 32(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 2840853838(%ecx,%eax,1),%ecx subl %edi,%edx roll $14,%ecx addl %ebp,%ecx movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 2840853838(%ebp,%edx,1),%ebp subl %esi,%eax roll $11,%ebp addl %ebx,%ebp movl 24(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 2840853838(%ebx,%eax,1),%ebx subl %ecx,%edx roll $8,%ebx addl %edi,%ebx movl 60(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 2840853838(%edi,%edx,1),%edi subl %ebp,%eax roll $5,%edi addl %esi,%edi movl 52(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl 128(%esp),%edx roll $10,%ebx leal 2840853838(%esi,%eax,1),%esi movl %ecx,64(%esp) roll $6,%esi addl %ecx,%esi movl (%edx),%ecx movl %esi,68(%esp) movl %edi,72(%esp) movl 4(%edx),%esi movl %ebx,76(%esp) movl 8(%edx),%edi movl %ebp,80(%esp) movl 12(%edx),%ebx movl 16(%edx),%ebp movl $-1,%edx subl %ebx,%edx movl 20(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 1352829926(%ecx,%edx,1),%ecx subl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 56(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 1352829926(%ebp,%eax,1),%ebp subl %esi,%edx roll $9,%ebp addl %ebx,%ebp movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 1352829926(%ebx,%edx,1),%ebx subl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl (%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 1352829926(%edi,%eax,1),%edi subl %ebp,%edx roll $11,%edi addl %esi,%edi movl 36(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 1352829926(%esi,%edx,1),%esi subl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 8(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 1352829926(%ecx,%eax,1),%ecx subl %edi,%edx roll $15,%ecx addl %ebp,%ecx movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 1352829926(%ebp,%edx,1),%ebp subl %esi,%eax roll $15,%ebp addl %ebx,%ebp movl 16(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 1352829926(%ebx,%eax,1),%ebx subl %ecx,%edx roll $5,%ebx addl %edi,%ebx movl 52(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 1352829926(%edi,%edx,1),%edi subl %ebp,%eax roll $7,%edi addl %esi,%edi movl 24(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl $-1,%edx roll $10,%ebx leal 1352829926(%esi,%eax,1),%esi subl %ebx,%edx roll $7,%esi addl %ecx,%esi movl 60(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 1352829926(%ecx,%edx,1),%ecx subl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 32(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 1352829926(%ebp,%eax,1),%ebp subl %esi,%edx roll $11,%ebp addl %ebx,%ebp movl 4(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 1352829926(%ebx,%edx,1),%ebx subl %ecx,%eax roll $14,%ebx addl %edi,%ebx movl 40(%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 1352829926(%edi,%eax,1),%edi subl %ebp,%edx roll $14,%edi addl %esi,%edi movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 1352829926(%esi,%edx,1),%esi subl %ebx,%eax roll $12,%esi addl %ecx,%esi movl 48(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 1352829926(%ecx,%eax,1),%ecx movl %edi,%eax roll $6,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 24(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $9,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 44(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $13,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 12(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $15,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 28(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $7,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl (%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $12,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 52(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $8,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 20(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $9,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 40(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $11,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 56(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $7,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 60(%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $7,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 32(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $12,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 48(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $7,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 16(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $6,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 36(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $15,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 4(%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $13,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 8(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp subl %ecx,%edx roll $11,%ebp addl %ebx,%ebp movl 60(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1836072691(%ebx,%edx,1),%ebx subl %ebp,%eax roll $9,%ebx addl %edi,%ebx movl 20(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1836072691(%edi,%eax,1),%edi subl %ebx,%edx roll $7,%edi addl %esi,%edi movl 4(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1836072691(%esi,%edx,1),%esi subl %edi,%eax roll $15,%esi addl %ecx,%esi movl 12(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1836072691(%ecx,%eax,1),%ecx subl %esi,%edx roll $11,%ecx addl %ebp,%ecx movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1836072691(%ebp,%edx,1),%ebp subl %ecx,%eax roll $8,%ebp addl %ebx,%ebp movl 56(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1836072691(%ebx,%eax,1),%ebx subl %ebp,%edx roll $6,%ebx addl %edi,%ebx movl 24(%esp),%eax orl %ebx,%edx addl %eax,%edi xorl %ecx,%edx movl $-1,%eax roll $10,%ebp leal 1836072691(%edi,%edx,1),%edi subl %ebx,%eax roll $6,%edi addl %esi,%edi movl 36(%esp),%edx orl %edi,%eax addl %edx,%esi xorl %ebp,%eax movl $-1,%edx roll $10,%ebx leal 1836072691(%esi,%eax,1),%esi subl %edi,%edx roll $14,%esi addl %ecx,%esi movl 44(%esp),%eax orl %esi,%edx addl %eax,%ecx xorl %ebx,%edx movl $-1,%eax roll $10,%edi leal 1836072691(%ecx,%edx,1),%ecx subl %esi,%eax roll $12,%ecx addl %ebp,%ecx movl 32(%esp),%edx orl %ecx,%eax addl %edx,%ebp xorl %edi,%eax movl $-1,%edx roll $10,%esi leal 1836072691(%ebp,%eax,1),%ebp subl %ecx,%edx roll $13,%ebp addl %ebx,%ebp movl 48(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1836072691(%ebx,%edx,1),%ebx subl %ebp,%eax roll $5,%ebx addl %edi,%ebx movl 8(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1836072691(%edi,%eax,1),%edi subl %ebx,%edx roll $14,%edi addl %esi,%edi movl 40(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1836072691(%esi,%edx,1),%esi subl %edi,%eax roll $13,%esi addl %ecx,%esi movl (%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1836072691(%ecx,%eax,1),%ecx subl %esi,%edx roll $13,%ecx addl %ebp,%ecx movl 16(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1836072691(%ebp,%edx,1),%ebp subl %ecx,%eax roll $7,%ebp addl %ebx,%ebp movl 52(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl 32(%esp),%edx roll $10,%ecx leal 1836072691(%ebx,%eax,1),%ebx movl $-1,%eax roll $5,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 24(%esp),%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi movl $-1,%edx roll $15,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 16(%esp),%edx roll $10,%ebx leal 2053994217(%esi,%eax,1),%esi movl $-1,%eax roll $5,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 4(%esp),%eax roll $10,%edi leal 2053994217(%ecx,%edx,1),%ecx movl $-1,%edx roll $8,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 12(%esp),%edx roll $10,%esi leal 2053994217(%ebp,%eax,1),%ebp movl $-1,%eax roll $11,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 44(%esp),%eax roll $10,%ecx leal 2053994217(%ebx,%edx,1),%ebx movl $-1,%edx roll $14,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %edx,%eax movl 60(%esp),%edx roll $10,%ebp leal 2053994217(%edi,%eax,1),%edi movl $-1,%eax roll $14,%edi addl %esi,%edi addl %edx,%esi movl %ebx,%edx subl %edi,%eax andl %edi,%edx andl %ebp,%eax orl %eax,%edx movl (%esp),%eax roll $10,%ebx leal 2053994217(%esi,%edx,1),%esi movl $-1,%edx roll $6,%esi addl %ecx,%esi addl %eax,%ecx movl %edi,%eax subl %esi,%edx andl %esi,%eax andl %ebx,%edx orl %edx,%eax movl 20(%esp),%edx roll $10,%edi leal 2053994217(%ecx,%eax,1),%ecx movl $-1,%eax roll $14,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 48(%esp),%eax roll $10,%esi leal 2053994217(%ebp,%edx,1),%ebp movl $-1,%edx roll $6,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 8(%esp),%edx roll $10,%ecx leal 2053994217(%ebx,%eax,1),%ebx movl $-1,%eax roll $9,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 52(%esp),%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi movl $-1,%edx roll $12,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 36(%esp),%edx roll $10,%ebx leal 2053994217(%esi,%eax,1),%esi movl $-1,%eax roll $9,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 28(%esp),%eax roll $10,%edi leal 2053994217(%ecx,%edx,1),%ecx movl $-1,%edx roll $12,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 40(%esp),%edx roll $10,%esi leal 2053994217(%ebp,%eax,1),%ebp movl $-1,%eax roll $5,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 56(%esp),%eax roll $10,%ecx leal 2053994217(%ebx,%edx,1),%ebx movl $-1,%edx roll $15,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %eax,%edx movl %ebx,%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi xorl %ebp,%eax roll $8,%edi addl %esi,%edi movl 48(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $8,%esi addl %ecx,%esi xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $5,%ecx addl %ebp,%ecx movl 40(%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $12,%ebp addl %ebx,%ebp xorl %esi,%eax movl 16(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl 4(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $12,%edi addl %esi,%edi xorl %ebp,%eax movl 20(%esp),%edx xorl %edi,%eax addl %eax,%esi movl %edi,%eax roll $10,%ebx addl %edx,%esi xorl %ebx,%eax roll $5,%esi addl %ecx,%esi movl 32(%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $14,%ecx addl %ebp,%ecx xorl %edi,%eax movl 28(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $6,%ebp addl %ebx,%ebp movl 24(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $8,%ebx addl %edi,%ebx xorl %ecx,%eax movl 8(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $13,%edi addl %esi,%edi movl 52(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $6,%esi addl %ecx,%esi xorl %ebx,%eax movl 56(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $5,%ecx addl %ebp,%ecx movl (%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $15,%ebp addl %ebx,%ebp xorl %esi,%eax movl 12(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $13,%ebx addl %edi,%ebx movl 36(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $11,%edi addl %esi,%edi xorl %ebp,%eax movl 44(%esp),%edx xorl %edi,%eax addl %eax,%esi roll $10,%ebx addl %edx,%esi movl 128(%esp),%edx roll $11,%esi addl %ecx,%esi movl 4(%edx),%eax addl %eax,%ebx movl 72(%esp),%eax addl %eax,%ebx movl 8(%edx),%eax addl %eax,%ebp movl 76(%esp),%eax addl %eax,%ebp movl 12(%edx),%eax addl %eax,%ecx movl 80(%esp),%eax addl %eax,%ecx movl 16(%edx),%eax addl %eax,%esi movl 64(%esp),%eax addl %eax,%esi movl (%edx),%eax addl %eax,%edi movl 68(%esp),%eax addl %eax,%edi movl 136(%esp),%eax movl %ebx,(%edx) movl %ebp,4(%edx) movl %ecx,8(%edx) subl $1,%eax movl %esi,12(%edx) movl %edi,16(%edx) jle .L001get_out movl %eax,136(%esp) movl %ecx,%edi movl 132(%esp),%eax movl %ebx,%ecx addl $64,%eax movl %ebp,%esi movl %eax,132(%esp) jmp .L000start .L001get_out: addl $108,%esp popl %ebx popl %ebp popl %edi popl %esi ret .size ripemd160_block_asm_data_order,.-.L_ripemd160_block_asm_data_order_begin #else .file "rmd-586.S" .text .globl ripemd160_block_asm_data_order .type ripemd160_block_asm_data_order,@function .align 16 ripemd160_block_asm_data_order: .L_ripemd160_block_asm_data_order_begin: movl 4(%esp),%edx movl 8(%esp),%eax pushl %esi movl (%edx),%ecx pushl %edi movl 4(%edx),%esi pushl %ebp movl 8(%edx),%edi pushl %ebx subl $108,%esp .L000start: movl (%eax),%ebx movl 4(%eax),%ebp movl %ebx,(%esp) movl %ebp,4(%esp) movl 8(%eax),%ebx movl 12(%eax),%ebp movl %ebx,8(%esp) movl %ebp,12(%esp) movl 16(%eax),%ebx movl 20(%eax),%ebp movl %ebx,16(%esp) movl %ebp,20(%esp) movl 24(%eax),%ebx movl 28(%eax),%ebp movl %ebx,24(%esp) movl %ebp,28(%esp) movl 32(%eax),%ebx movl 36(%eax),%ebp movl %ebx,32(%esp) movl %ebp,36(%esp) movl 40(%eax),%ebx movl 44(%eax),%ebp movl %ebx,40(%esp) movl %ebp,44(%esp) movl 48(%eax),%ebx movl 52(%eax),%ebp movl %ebx,48(%esp) movl %ebp,52(%esp) movl 56(%eax),%ebx movl 60(%eax),%ebp movl %ebx,56(%esp) movl %ebp,60(%esp) movl %edi,%eax movl 12(%edx),%ebx movl 16(%edx),%ebp xorl %ebx,%eax movl (%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $11,%ecx addl %ebp,%ecx xorl %edi,%eax movl 4(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $14,%ebp addl %ebx,%ebp movl 8(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $15,%ebx addl %edi,%ebx xorl %ecx,%eax movl 12(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $12,%edi addl %esi,%edi movl 16(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $5,%esi addl %ecx,%esi xorl %ebx,%eax movl 20(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 24(%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $7,%ebp addl %ebx,%ebp xorl %esi,%eax movl 28(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl 32(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $11,%edi addl %esi,%edi xorl %ebp,%eax movl 36(%esp),%edx xorl %edi,%eax addl %eax,%esi movl %edi,%eax roll $10,%ebx addl %edx,%esi xorl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 40(%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $14,%ecx addl %ebp,%ecx xorl %edi,%eax movl 44(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $15,%ebp addl %ebx,%ebp movl 48(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $6,%ebx addl %edi,%ebx xorl %ecx,%eax movl 52(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $7,%edi addl %esi,%edi movl 56(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $9,%esi addl %ecx,%esi xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl $-1,%eax roll $10,%edi addl %edx,%ecx movl 28(%esp),%edx roll $8,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 16(%esp),%eax roll $10,%esi leal 1518500249(%ebp,%edx,1),%ebp movl $-1,%edx roll $7,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 52(%esp),%edx roll $10,%ecx leal 1518500249(%ebx,%eax,1),%ebx movl $-1,%eax roll $6,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 4(%esp),%eax roll $10,%ebp leal 1518500249(%edi,%edx,1),%edi movl $-1,%edx roll $8,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 40(%esp),%edx roll $10,%ebx leal 1518500249(%esi,%eax,1),%esi movl $-1,%eax roll $13,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 24(%esp),%eax roll $10,%edi leal 1518500249(%ecx,%edx,1),%ecx movl $-1,%edx roll $11,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 60(%esp),%edx roll $10,%esi leal 1518500249(%ebp,%eax,1),%ebp movl $-1,%eax roll $9,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 12(%esp),%eax roll $10,%ecx leal 1518500249(%ebx,%edx,1),%ebx movl $-1,%edx roll $7,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %edx,%eax movl 48(%esp),%edx roll $10,%ebp leal 1518500249(%edi,%eax,1),%edi movl $-1,%eax roll $15,%edi addl %esi,%edi addl %edx,%esi movl %ebx,%edx subl %edi,%eax andl %edi,%edx andl %ebp,%eax orl %eax,%edx movl (%esp),%eax roll $10,%ebx leal 1518500249(%esi,%edx,1),%esi movl $-1,%edx roll $7,%esi addl %ecx,%esi addl %eax,%ecx movl %edi,%eax subl %esi,%edx andl %esi,%eax andl %ebx,%edx orl %edx,%eax movl 36(%esp),%edx roll $10,%edi leal 1518500249(%ecx,%eax,1),%ecx movl $-1,%eax roll $12,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 20(%esp),%eax roll $10,%esi leal 1518500249(%ebp,%edx,1),%ebp movl $-1,%edx roll $15,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 8(%esp),%edx roll $10,%ecx leal 1518500249(%ebx,%eax,1),%ebx movl $-1,%eax roll $9,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 56(%esp),%eax roll $10,%ebp leal 1518500249(%edi,%edx,1),%edi movl $-1,%edx roll $11,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 44(%esp),%edx roll $10,%ebx leal 1518500249(%esi,%eax,1),%esi movl $-1,%eax roll $7,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 32(%esp),%eax roll $10,%edi leal 1518500249(%ecx,%edx,1),%ecx movl $-1,%edx roll $13,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl $-1,%edx roll $10,%esi leal 1518500249(%ebp,%eax,1),%ebp subl %ecx,%edx roll $12,%ebp addl %ebx,%ebp movl 12(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1859775393(%ebx,%edx,1),%ebx subl %ebp,%eax roll $11,%ebx addl %edi,%ebx movl 40(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1859775393(%edi,%eax,1),%edi subl %ebx,%edx roll $13,%edi addl %esi,%edi movl 56(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1859775393(%esi,%edx,1),%esi subl %edi,%eax roll $6,%esi addl %ecx,%esi movl 16(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1859775393(%ecx,%eax,1),%ecx subl %esi,%edx roll $7,%ecx addl %ebp,%ecx movl 36(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1859775393(%ebp,%edx,1),%ebp subl %ecx,%eax roll $14,%ebp addl %ebx,%ebp movl 60(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1859775393(%ebx,%eax,1),%ebx subl %ebp,%edx roll $9,%ebx addl %edi,%ebx movl 32(%esp),%eax orl %ebx,%edx addl %eax,%edi xorl %ecx,%edx movl $-1,%eax roll $10,%ebp leal 1859775393(%edi,%edx,1),%edi subl %ebx,%eax roll $13,%edi addl %esi,%edi movl 4(%esp),%edx orl %edi,%eax addl %edx,%esi xorl %ebp,%eax movl $-1,%edx roll $10,%ebx leal 1859775393(%esi,%eax,1),%esi subl %edi,%edx roll $15,%esi addl %ecx,%esi movl 8(%esp),%eax orl %esi,%edx addl %eax,%ecx xorl %ebx,%edx movl $-1,%eax roll $10,%edi leal 1859775393(%ecx,%edx,1),%ecx subl %esi,%eax roll $14,%ecx addl %ebp,%ecx movl 28(%esp),%edx orl %ecx,%eax addl %edx,%ebp xorl %edi,%eax movl $-1,%edx roll $10,%esi leal 1859775393(%ebp,%eax,1),%ebp subl %ecx,%edx roll $8,%ebp addl %ebx,%ebp movl (%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1859775393(%ebx,%edx,1),%ebx subl %ebp,%eax roll $13,%ebx addl %edi,%ebx movl 24(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1859775393(%edi,%eax,1),%edi subl %ebx,%edx roll $6,%edi addl %esi,%edi movl 52(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1859775393(%esi,%edx,1),%esi subl %edi,%eax roll $5,%esi addl %ecx,%esi movl 44(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1859775393(%ecx,%eax,1),%ecx subl %esi,%edx roll $12,%ecx addl %ebp,%ecx movl 20(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1859775393(%ebp,%edx,1),%ebp subl %ecx,%eax roll $7,%ebp addl %ebx,%ebp movl 48(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1859775393(%ebx,%eax,1),%ebx movl %ecx,%eax roll $5,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 4(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $11,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 36(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $12,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 44(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $14,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 40(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $15,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl (%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $14,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 32(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $15,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 48(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $9,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 16(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $8,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 52(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $9,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 12(%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $14,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 28(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $5,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 60(%esp),%eax roll $10,%ebx leal 2400959708(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $6,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 56(%esp),%eax roll $10,%edi leal 2400959708(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $8,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 20(%esp),%eax roll $10,%esi leal 2400959708(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $6,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 24(%esp),%eax roll $10,%ecx leal 2400959708(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $5,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 8(%esp),%eax roll $10,%ebp leal 2400959708(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi subl %ebp,%edx roll $12,%edi addl %esi,%edi movl 16(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 2840853838(%esi,%edx,1),%esi subl %ebx,%eax roll $9,%esi addl %ecx,%esi movl (%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 2840853838(%ecx,%eax,1),%ecx subl %edi,%edx roll $15,%ecx addl %ebp,%ecx movl 20(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 2840853838(%ebp,%edx,1),%ebp subl %esi,%eax roll $5,%ebp addl %ebx,%ebp movl 36(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 2840853838(%ebx,%eax,1),%ebx subl %ecx,%edx roll $11,%ebx addl %edi,%ebx movl 28(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 2840853838(%edi,%edx,1),%edi subl %ebp,%eax roll $6,%edi addl %esi,%edi movl 48(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl $-1,%edx roll $10,%ebx leal 2840853838(%esi,%eax,1),%esi subl %ebx,%edx roll $8,%esi addl %ecx,%esi movl 8(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 2840853838(%ecx,%edx,1),%ecx subl %edi,%eax roll $13,%ecx addl %ebp,%ecx movl 40(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 2840853838(%ebp,%eax,1),%ebp subl %esi,%edx roll $12,%ebp addl %ebx,%ebp movl 56(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 2840853838(%ebx,%edx,1),%ebx subl %ecx,%eax roll $5,%ebx addl %edi,%ebx movl 4(%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 2840853838(%edi,%eax,1),%edi subl %ebp,%edx roll $12,%edi addl %esi,%edi movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 2840853838(%esi,%edx,1),%esi subl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 32(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 2840853838(%ecx,%eax,1),%ecx subl %edi,%edx roll $14,%ecx addl %ebp,%ecx movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 2840853838(%ebp,%edx,1),%ebp subl %esi,%eax roll $11,%ebp addl %ebx,%ebp movl 24(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 2840853838(%ebx,%eax,1),%ebx subl %ecx,%edx roll $8,%ebx addl %edi,%ebx movl 60(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 2840853838(%edi,%edx,1),%edi subl %ebp,%eax roll $5,%edi addl %esi,%edi movl 52(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl 128(%esp),%edx roll $10,%ebx leal 2840853838(%esi,%eax,1),%esi movl %ecx,64(%esp) roll $6,%esi addl %ecx,%esi movl (%edx),%ecx movl %esi,68(%esp) movl %edi,72(%esp) movl 4(%edx),%esi movl %ebx,76(%esp) movl 8(%edx),%edi movl %ebp,80(%esp) movl 12(%edx),%ebx movl 16(%edx),%ebp movl $-1,%edx subl %ebx,%edx movl 20(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 1352829926(%ecx,%edx,1),%ecx subl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 56(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 1352829926(%ebp,%eax,1),%ebp subl %esi,%edx roll $9,%ebp addl %ebx,%ebp movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 1352829926(%ebx,%edx,1),%ebx subl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl (%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 1352829926(%edi,%eax,1),%edi subl %ebp,%edx roll $11,%edi addl %esi,%edi movl 36(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 1352829926(%esi,%edx,1),%esi subl %ebx,%eax roll $13,%esi addl %ecx,%esi movl 8(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 1352829926(%ecx,%eax,1),%ecx subl %edi,%edx roll $15,%ecx addl %ebp,%ecx movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp xorl %ecx,%edx movl $-1,%eax roll $10,%esi leal 1352829926(%ebp,%edx,1),%ebp subl %esi,%eax roll $15,%ebp addl %ebx,%ebp movl 16(%esp),%edx orl %ecx,%eax addl %edx,%ebx xorl %ebp,%eax movl $-1,%edx roll $10,%ecx leal 1352829926(%ebx,%eax,1),%ebx subl %ecx,%edx roll $5,%ebx addl %edi,%ebx movl 52(%esp),%eax orl %ebp,%edx addl %eax,%edi xorl %ebx,%edx movl $-1,%eax roll $10,%ebp leal 1352829926(%edi,%edx,1),%edi subl %ebp,%eax roll $7,%edi addl %esi,%edi movl 24(%esp),%edx orl %ebx,%eax addl %edx,%esi xorl %edi,%eax movl $-1,%edx roll $10,%ebx leal 1352829926(%esi,%eax,1),%esi subl %ebx,%edx roll $7,%esi addl %ecx,%esi movl 60(%esp),%eax orl %edi,%edx addl %eax,%ecx xorl %esi,%edx movl $-1,%eax roll $10,%edi leal 1352829926(%ecx,%edx,1),%ecx subl %edi,%eax roll $8,%ecx addl %ebp,%ecx movl 32(%esp),%edx orl %esi,%eax addl %edx,%ebp xorl %ecx,%eax movl $-1,%edx roll $10,%esi leal 1352829926(%ebp,%eax,1),%ebp subl %esi,%edx roll $11,%ebp addl %ebx,%ebp movl 4(%esp),%eax orl %ecx,%edx addl %eax,%ebx xorl %ebp,%edx movl $-1,%eax roll $10,%ecx leal 1352829926(%ebx,%edx,1),%ebx subl %ecx,%eax roll $14,%ebx addl %edi,%ebx movl 40(%esp),%edx orl %ebp,%eax addl %edx,%edi xorl %ebx,%eax movl $-1,%edx roll $10,%ebp leal 1352829926(%edi,%eax,1),%edi subl %ebp,%edx roll $14,%edi addl %esi,%edi movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi xorl %edi,%edx movl $-1,%eax roll $10,%ebx leal 1352829926(%esi,%edx,1),%esi subl %ebx,%eax roll $12,%esi addl %ecx,%esi movl 48(%esp),%edx orl %edi,%eax addl %edx,%ecx xorl %esi,%eax movl $-1,%edx roll $10,%edi leal 1352829926(%ecx,%eax,1),%ecx movl %edi,%eax roll $6,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 24(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $9,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 44(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $13,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 12(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $15,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 28(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $7,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl (%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $12,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 52(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $8,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 20(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $9,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 40(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $11,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 56(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $7,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 60(%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $7,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 32(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp movl %esi,%eax roll $12,%ebp addl %ebx,%ebp subl %esi,%edx andl %ebp,%eax andl %ecx,%edx orl %eax,%edx movl 48(%esp),%eax roll $10,%ecx leal 1548603684(%ebx,%edx,1),%ebx movl $-1,%edx addl %eax,%ebx movl %ecx,%eax roll $7,%ebx addl %edi,%ebx subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx orl %eax,%edx movl 16(%esp),%eax roll $10,%ebp leal 1548603684(%edi,%edx,1),%edi movl $-1,%edx addl %eax,%edi movl %ebp,%eax roll $6,%edi addl %esi,%edi subl %ebp,%edx andl %edi,%eax andl %ebx,%edx orl %eax,%edx movl 36(%esp),%eax roll $10,%ebx leal 1548603684(%esi,%edx,1),%esi movl $-1,%edx addl %eax,%esi movl %ebx,%eax roll $15,%esi addl %ecx,%esi subl %ebx,%edx andl %esi,%eax andl %edi,%edx orl %eax,%edx movl 4(%esp),%eax roll $10,%edi leal 1548603684(%ecx,%edx,1),%ecx movl $-1,%edx addl %eax,%ecx movl %edi,%eax roll $13,%ecx addl %ebp,%ecx subl %edi,%edx andl %ecx,%eax andl %esi,%edx orl %eax,%edx movl 8(%esp),%eax roll $10,%esi leal 1548603684(%ebp,%edx,1),%ebp movl $-1,%edx addl %eax,%ebp subl %ecx,%edx roll $11,%ebp addl %ebx,%ebp movl 60(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1836072691(%ebx,%edx,1),%ebx subl %ebp,%eax roll $9,%ebx addl %edi,%ebx movl 20(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1836072691(%edi,%eax,1),%edi subl %ebx,%edx roll $7,%edi addl %esi,%edi movl 4(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1836072691(%esi,%edx,1),%esi subl %edi,%eax roll $15,%esi addl %ecx,%esi movl 12(%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1836072691(%ecx,%eax,1),%ecx subl %esi,%edx roll $11,%ecx addl %ebp,%ecx movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1836072691(%ebp,%edx,1),%ebp subl %ecx,%eax roll $8,%ebp addl %ebx,%ebp movl 56(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl $-1,%edx roll $10,%ecx leal 1836072691(%ebx,%eax,1),%ebx subl %ebp,%edx roll $6,%ebx addl %edi,%ebx movl 24(%esp),%eax orl %ebx,%edx addl %eax,%edi xorl %ecx,%edx movl $-1,%eax roll $10,%ebp leal 1836072691(%edi,%edx,1),%edi subl %ebx,%eax roll $6,%edi addl %esi,%edi movl 36(%esp),%edx orl %edi,%eax addl %edx,%esi xorl %ebp,%eax movl $-1,%edx roll $10,%ebx leal 1836072691(%esi,%eax,1),%esi subl %edi,%edx roll $14,%esi addl %ecx,%esi movl 44(%esp),%eax orl %esi,%edx addl %eax,%ecx xorl %ebx,%edx movl $-1,%eax roll $10,%edi leal 1836072691(%ecx,%edx,1),%ecx subl %esi,%eax roll $12,%ecx addl %ebp,%ecx movl 32(%esp),%edx orl %ecx,%eax addl %edx,%ebp xorl %edi,%eax movl $-1,%edx roll $10,%esi leal 1836072691(%ebp,%eax,1),%ebp subl %ecx,%edx roll $13,%ebp addl %ebx,%ebp movl 48(%esp),%eax orl %ebp,%edx addl %eax,%ebx xorl %esi,%edx movl $-1,%eax roll $10,%ecx leal 1836072691(%ebx,%edx,1),%ebx subl %ebp,%eax roll $5,%ebx addl %edi,%ebx movl 8(%esp),%edx orl %ebx,%eax addl %edx,%edi xorl %ecx,%eax movl $-1,%edx roll $10,%ebp leal 1836072691(%edi,%eax,1),%edi subl %ebx,%edx roll $14,%edi addl %esi,%edi movl 40(%esp),%eax orl %edi,%edx addl %eax,%esi xorl %ebp,%edx movl $-1,%eax roll $10,%ebx leal 1836072691(%esi,%edx,1),%esi subl %edi,%eax roll $13,%esi addl %ecx,%esi movl (%esp),%edx orl %esi,%eax addl %edx,%ecx xorl %ebx,%eax movl $-1,%edx roll $10,%edi leal 1836072691(%ecx,%eax,1),%ecx subl %esi,%edx roll $13,%ecx addl %ebp,%ecx movl 16(%esp),%eax orl %ecx,%edx addl %eax,%ebp xorl %edi,%edx movl $-1,%eax roll $10,%esi leal 1836072691(%ebp,%edx,1),%ebp subl %ecx,%eax roll $7,%ebp addl %ebx,%ebp movl 52(%esp),%edx orl %ebp,%eax addl %edx,%ebx xorl %esi,%eax movl 32(%esp),%edx roll $10,%ecx leal 1836072691(%ebx,%eax,1),%ebx movl $-1,%eax roll $5,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 24(%esp),%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi movl $-1,%edx roll $15,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 16(%esp),%edx roll $10,%ebx leal 2053994217(%esi,%eax,1),%esi movl $-1,%eax roll $5,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 4(%esp),%eax roll $10,%edi leal 2053994217(%ecx,%edx,1),%ecx movl $-1,%edx roll $8,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 12(%esp),%edx roll $10,%esi leal 2053994217(%ebp,%eax,1),%ebp movl $-1,%eax roll $11,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 44(%esp),%eax roll $10,%ecx leal 2053994217(%ebx,%edx,1),%ebx movl $-1,%edx roll $14,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %edx,%eax movl 60(%esp),%edx roll $10,%ebp leal 2053994217(%edi,%eax,1),%edi movl $-1,%eax roll $14,%edi addl %esi,%edi addl %edx,%esi movl %ebx,%edx subl %edi,%eax andl %edi,%edx andl %ebp,%eax orl %eax,%edx movl (%esp),%eax roll $10,%ebx leal 2053994217(%esi,%edx,1),%esi movl $-1,%edx roll $6,%esi addl %ecx,%esi addl %eax,%ecx movl %edi,%eax subl %esi,%edx andl %esi,%eax andl %ebx,%edx orl %edx,%eax movl 20(%esp),%edx roll $10,%edi leal 2053994217(%ecx,%eax,1),%ecx movl $-1,%eax roll $14,%ecx addl %ebp,%ecx addl %edx,%ebp movl %esi,%edx subl %ecx,%eax andl %ecx,%edx andl %edi,%eax orl %eax,%edx movl 48(%esp),%eax roll $10,%esi leal 2053994217(%ebp,%edx,1),%ebp movl $-1,%edx roll $6,%ebp addl %ebx,%ebp addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx andl %ebp,%eax andl %esi,%edx orl %edx,%eax movl 8(%esp),%edx roll $10,%ecx leal 2053994217(%ebx,%eax,1),%ebx movl $-1,%eax roll $9,%ebx addl %edi,%ebx addl %edx,%edi movl %ebp,%edx subl %ebx,%eax andl %ebx,%edx andl %ecx,%eax orl %eax,%edx movl 52(%esp),%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi movl $-1,%edx roll $12,%edi addl %esi,%edi addl %eax,%esi movl %ebx,%eax subl %edi,%edx andl %edi,%eax andl %ebp,%edx orl %edx,%eax movl 36(%esp),%edx roll $10,%ebx leal 2053994217(%esi,%eax,1),%esi movl $-1,%eax roll $9,%esi addl %ecx,%esi addl %edx,%ecx movl %edi,%edx subl %esi,%eax andl %esi,%edx andl %ebx,%eax orl %eax,%edx movl 28(%esp),%eax roll $10,%edi leal 2053994217(%ecx,%edx,1),%ecx movl $-1,%edx roll $12,%ecx addl %ebp,%ecx addl %eax,%ebp movl %esi,%eax subl %ecx,%edx andl %ecx,%eax andl %edi,%edx orl %edx,%eax movl 40(%esp),%edx roll $10,%esi leal 2053994217(%ebp,%eax,1),%ebp movl $-1,%eax roll $5,%ebp addl %ebx,%ebp addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax andl %ebp,%edx andl %esi,%eax orl %eax,%edx movl 56(%esp),%eax roll $10,%ecx leal 2053994217(%ebx,%edx,1),%ebx movl $-1,%edx roll $15,%ebx addl %edi,%ebx addl %eax,%edi movl %ebp,%eax subl %ebx,%edx andl %ebx,%eax andl %ecx,%edx orl %eax,%edx movl %ebx,%eax roll $10,%ebp leal 2053994217(%edi,%edx,1),%edi xorl %ebp,%eax roll $8,%edi addl %esi,%edi movl 48(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $8,%esi addl %ecx,%esi xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $5,%ecx addl %ebp,%ecx movl 40(%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $12,%ebp addl %ebx,%ebp xorl %esi,%eax movl 16(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $9,%ebx addl %edi,%ebx movl 4(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $12,%edi addl %esi,%edi xorl %ebp,%eax movl 20(%esp),%edx xorl %edi,%eax addl %eax,%esi movl %edi,%eax roll $10,%ebx addl %edx,%esi xorl %ebx,%eax roll $5,%esi addl %ecx,%esi movl 32(%esp),%edx xorl %esi,%eax addl %edx,%ecx roll $10,%edi addl %eax,%ecx movl %esi,%eax roll $14,%ecx addl %ebp,%ecx xorl %edi,%eax movl 28(%esp),%edx xorl %ecx,%eax addl %eax,%ebp movl %ecx,%eax roll $10,%esi addl %edx,%ebp xorl %esi,%eax roll $6,%ebp addl %ebx,%ebp movl 24(%esp),%edx xorl %ebp,%eax addl %edx,%ebx roll $10,%ecx addl %eax,%ebx movl %ebp,%eax roll $8,%ebx addl %edi,%ebx xorl %ecx,%eax movl 8(%esp),%edx xorl %ebx,%eax addl %eax,%edi movl %ebx,%eax roll $10,%ebp addl %edx,%edi xorl %ebp,%eax roll $13,%edi addl %esi,%edi movl 52(%esp),%edx xorl %edi,%eax addl %edx,%esi roll $10,%ebx addl %eax,%esi movl %edi,%eax roll $6,%esi addl %ecx,%esi xorl %ebx,%eax movl 56(%esp),%edx xorl %esi,%eax addl %eax,%ecx movl %esi,%eax roll $10,%edi addl %edx,%ecx xorl %edi,%eax roll $5,%ecx addl %ebp,%ecx movl (%esp),%edx xorl %ecx,%eax addl %edx,%ebp roll $10,%esi addl %eax,%ebp movl %ecx,%eax roll $15,%ebp addl %ebx,%ebp xorl %esi,%eax movl 12(%esp),%edx xorl %ebp,%eax addl %eax,%ebx movl %ebp,%eax roll $10,%ecx addl %edx,%ebx xorl %ecx,%eax roll $13,%ebx addl %edi,%ebx movl 36(%esp),%edx xorl %ebx,%eax addl %edx,%edi roll $10,%ebp addl %eax,%edi movl %ebx,%eax roll $11,%edi addl %esi,%edi xorl %ebp,%eax movl 44(%esp),%edx xorl %edi,%eax addl %eax,%esi roll $10,%ebx addl %edx,%esi movl 128(%esp),%edx roll $11,%esi addl %ecx,%esi movl 4(%edx),%eax addl %eax,%ebx movl 72(%esp),%eax addl %eax,%ebx movl 8(%edx),%eax addl %eax,%ebp movl 76(%esp),%eax addl %eax,%ebp movl 12(%edx),%eax addl %eax,%ecx movl 80(%esp),%eax addl %eax,%ecx movl 16(%edx),%eax addl %eax,%esi movl 64(%esp),%eax addl %eax,%esi movl (%edx),%eax addl %eax,%edi movl 68(%esp),%eax addl %eax,%edi movl 136(%esp),%eax movl %ebx,(%edx) movl %ebp,4(%edx) movl %ecx,8(%edx) subl $1,%eax movl %esi,12(%edx) movl %edi,16(%edx) jle .L001get_out movl %eax,136(%esp) movl %ecx,%edi movl 132(%esp),%eax movl %ebx,%ecx addl $64,%eax movl %ebp,%esi movl %eax,132(%esp) jmp .L000start .L001get_out: addl $108,%esp popl %ebx popl %ebp popl %edi popl %esi ret .size ripemd160_block_asm_data_order,.-.L_ripemd160_block_asm_data_order_begin #endif Index: head/secure/lib/libcrypto/i386/sha1-586.S =================================================================== --- head/secure/lib/libcrypto/i386/sha1-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/sha1-586.S (revision 299481) @@ -1,5594 +1,7945 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha1-586.pl. #ifdef PIC .file "sha1-586.S" .text .globl sha1_block_data_order .type sha1_block_data_order,@function .align 16 sha1_block_data_order: .L_sha1_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L000pic_point .L000pic_point: popl %ebp leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi leal .LK_XX_XX-.L000pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%edx testl $512,%edx jz .L001x86 movl 8(%esi),%ecx testl $16777216,%eax jz .L001x86 testl $536870912,%ecx jnz .Lshaext_shortcut + andl $268435456,%edx + andl $1073741824,%eax + orl %edx,%eax + cmpl $1342177280,%eax + je .Lavx_shortcut jmp .Lssse3_shortcut .align 16 .L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax subl $76,%esp shll $6,%eax addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi jmp .L002loop .align 16 .L002loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,16(%esp) movl %ebx,20(%esp) movl %ecx,24(%esp) movl %edx,28(%esp) movl 32(%esi),%eax movl 36(%esi),%ebx movl 40(%esi),%ecx movl 44(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edx,44(%esp) movl 48(%esi),%eax movl 52(%esi),%ebx movl 56(%esi),%ecx movl 60(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,48(%esp) movl %ebx,52(%esp) movl %ecx,56(%esp) movl %edx,60(%esp) movl %esi,100(%esp) movl (%ebp),%eax movl 4(%ebp),%ebx movl 8(%ebp),%ecx movl 12(%ebp),%edx movl %ecx,%esi movl %eax,%ebp roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl (%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 4(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 8(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 12(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp movl 16(%esp),%eax andl %edx,%ebx rorl $2,%edx xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp movl %edx,%eax movl %ebp,%ebx roll $5,%ebp xorl %edi,%eax addl %esi,%ebp movl 20(%esp),%esi andl %ecx,%eax rorl $2,%ecx xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp movl %ecx,%esi movl %ebp,%eax roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl 24(%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 28(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 32(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 36(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp movl 40(%esp),%eax andl %edx,%ebx rorl $2,%edx xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp movl %edx,%eax movl %ebp,%ebx roll $5,%ebp xorl %edi,%eax addl %esi,%ebp movl 44(%esp),%esi andl %ecx,%eax rorl $2,%ecx xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp movl %ecx,%esi movl %ebp,%eax roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl 48(%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 52(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 56(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 60(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp movl (%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 8(%esp),%ebx xorl %esi,%ebp xorl 32(%esp),%ebx andl %edx,%ebp xorl 52(%esp),%ebx roll $1,%ebx xorl %esi,%ebp addl %ebp,%eax movl %ecx,%ebp rorl $2,%edx movl %ebx,(%esp) roll $5,%ebp leal 1518500249(%ebx,%eax,1),%ebx movl 4(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 12(%esp),%eax xorl %edi,%ebp xorl 36(%esp),%eax andl %ecx,%ebp xorl 56(%esp),%eax roll $1,%eax xorl %edi,%ebp addl %ebp,%esi movl %ebx,%ebp rorl $2,%ecx movl %eax,4(%esp) roll $5,%ebp leal 1518500249(%eax,%esi,1),%eax movl 8(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 40(%esp),%esi andl %ebx,%ebp xorl 60(%esp),%esi roll $1,%esi xorl %edx,%ebp addl %ebp,%edi movl %eax,%ebp rorl $2,%ebx movl %esi,8(%esp) roll $5,%ebp leal 1518500249(%esi,%edi,1),%esi movl 12(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 44(%esp),%edi andl %eax,%ebp xorl (%esp),%edi roll $1,%edi xorl %ecx,%ebp addl %ebp,%edx movl %esi,%ebp rorl $2,%eax movl %edi,12(%esp) roll $5,%ebp leal 1518500249(%edi,%edx,1),%edi movl 16(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,16(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 20(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,20(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 24(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,24(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 28(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,28(%esp) leal 1859775393(%eax,%esi,1),%eax movl 32(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,32(%esp) leal 1859775393(%esi,%edi,1),%esi movl 36(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,36(%esp) leal 1859775393(%edi,%edx,1),%edi movl 40(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,40(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 44(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,44(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 48(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,48(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 52(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,52(%esp) leal 1859775393(%eax,%esi,1),%eax movl 56(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,56(%esp) leal 1859775393(%esi,%edi,1),%esi movl 60(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,60(%esp) leal 1859775393(%edi,%edx,1),%edi movl (%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 8(%esp),%edx xorl %eax,%ebp xorl 32(%esp),%edx xorl %ebx,%ebp xorl 52(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 4(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 12(%esp),%ecx xorl %esi,%ebp xorl 36(%esp),%ecx xorl %eax,%ebp xorl 56(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,4(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 8(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 16(%esp),%ebx xorl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl 60(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,8(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 12(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 20(%esp),%eax xorl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl (%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,12(%esp) leal 1859775393(%eax,%esi,1),%eax movl 16(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 24(%esp),%esi xorl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 4(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,16(%esp) leal 1859775393(%esi,%edi,1),%esi movl 20(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 28(%esp),%edi xorl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 8(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,20(%esp) leal 1859775393(%edi,%edx,1),%edi movl 24(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 32(%esp),%edx xorl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 12(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,24(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 28(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 36(%esp),%ecx xorl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 16(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,28(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 32(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl (%esp),%ebx andl %edx,%ebp xorl 20(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,32(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 36(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl 4(%esp),%eax andl %ecx,%ebp xorl 24(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,36(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 40(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 8(%esp),%esi andl %ebx,%ebp xorl 28(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,40(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 44(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 12(%esp),%edi andl %eax,%ebp xorl 32(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,44(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 48(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 16(%esp),%edx andl %esi,%ebp xorl 36(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,48(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 52(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 20(%esp),%ecx andl %edi,%ebp xorl 40(%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,52(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 56(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl (%esp),%ebx xorl %esi,%ebp xorl 24(%esp),%ebx andl %edx,%ebp xorl 44(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,56(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 60(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 4(%esp),%eax xorl %edi,%ebp xorl 28(%esp),%eax andl %ecx,%ebp xorl 48(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,60(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl (%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 8(%esp),%esi xorl %edx,%ebp xorl 32(%esp),%esi andl %ebx,%ebp xorl 52(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 4(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 12(%esp),%edi xorl %ecx,%ebp xorl 36(%esp),%edi andl %eax,%ebp xorl 56(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,4(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 8(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 16(%esp),%edx xorl %ebx,%ebp xorl 40(%esp),%edx andl %esi,%ebp xorl 60(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,8(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 12(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 20(%esp),%ecx xorl %eax,%ebp xorl 44(%esp),%ecx andl %edi,%ebp xorl (%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,12(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 16(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 24(%esp),%ebx xorl %esi,%ebp xorl 48(%esp),%ebx andl %edx,%ebp xorl 4(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,16(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 20(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 28(%esp),%eax xorl %edi,%ebp xorl 52(%esp),%eax andl %ecx,%ebp xorl 8(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,20(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 24(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 32(%esp),%esi xorl %edx,%ebp xorl 56(%esp),%esi andl %ebx,%ebp xorl 12(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,24(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 28(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 36(%esp),%edi xorl %ecx,%ebp xorl 60(%esp),%edi andl %eax,%ebp xorl 16(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,28(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 32(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 40(%esp),%edx xorl %ebx,%ebp xorl (%esp),%edx andl %esi,%ebp xorl 20(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,32(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 36(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 44(%esp),%ecx xorl %eax,%ebp xorl 4(%esp),%ecx andl %edi,%ebp xorl 24(%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,36(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 40(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 48(%esp),%ebx xorl %esi,%ebp xorl 8(%esp),%ebx andl %edx,%ebp xorl 28(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,40(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 44(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 52(%esp),%eax xorl %edi,%ebp xorl 12(%esp),%eax andl %ecx,%ebp xorl 32(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,44(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 48(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 56(%esp),%esi xorl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 36(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,48(%esp) leal 3395469782(%esi,%edi,1),%esi movl 52(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 60(%esp),%edi xorl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 40(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,52(%esp) leal 3395469782(%edi,%edx,1),%edi movl 56(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl (%esp),%edx xorl %eax,%ebp xorl 24(%esp),%edx xorl %ebx,%ebp xorl 44(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,56(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 60(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 4(%esp),%ecx xorl %esi,%ebp xorl 28(%esp),%ecx xorl %eax,%ebp xorl 48(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,60(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl (%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 8(%esp),%ebx xorl %edi,%ebp xorl 32(%esp),%ebx xorl %esi,%ebp xorl 52(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 4(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 12(%esp),%eax xorl %edx,%ebp xorl 36(%esp),%eax xorl %edi,%ebp xorl 56(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,4(%esp) leal 3395469782(%eax,%esi,1),%eax movl 8(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 16(%esp),%esi xorl %ecx,%ebp xorl 40(%esp),%esi xorl %edx,%ebp xorl 60(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,8(%esp) leal 3395469782(%esi,%edi,1),%esi movl 12(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 20(%esp),%edi xorl %ebx,%ebp xorl 44(%esp),%edi xorl %ecx,%ebp xorl (%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,12(%esp) leal 3395469782(%edi,%edx,1),%edi movl 16(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,16(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 20(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,20(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl 24(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,24(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 28(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,28(%esp) leal 3395469782(%eax,%esi,1),%eax movl 32(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,32(%esp) leal 3395469782(%esi,%edi,1),%esi movl 36(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,36(%esp) leal 3395469782(%edi,%edx,1),%edi movl 40(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,40(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 44(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,44(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl 48(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,48(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 52(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp leal 3395469782(%eax,%esi,1),%eax movl 56(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp leal 3395469782(%esi,%edi,1),%esi movl 60(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp leal 3395469782(%edi,%edx,1),%edi addl %ebp,%edi movl 96(%esp),%ebp movl 100(%esp),%edx addl (%ebp),%edi addl 4(%ebp),%esi addl 8(%ebp),%eax addl 12(%ebp),%ebx addl 16(%ebp),%ecx movl %edi,(%ebp) addl $64,%edx movl %esi,4(%ebp) cmpl 104(%esp),%edx movl %eax,8(%ebp) movl %ecx,%edi movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) jb .L002loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size sha1_block_data_order,.-.L_sha1_block_data_order_begin .type _sha1_block_data_order_shaext,@function .align 16 _sha1_block_data_order_shaext: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L003pic_point .L003pic_point: popl %ebp leal .LK_XX_XX-.L003pic_point(%ebp),%ebp .Lshaext_shortcut: movl 20(%esp),%edi movl %esp,%ebx movl 24(%esp),%esi movl 28(%esp),%ecx subl $32,%esp movdqu (%edi),%xmm0 movd 16(%edi),%xmm1 andl $-32,%esp movdqa 80(%ebp),%xmm3 movdqu (%esi),%xmm4 pshufd $27,%xmm0,%xmm0 movdqu 16(%esi),%xmm5 pshufd $27,%xmm1,%xmm1 movdqu 32(%esi),%xmm6 .byte 102,15,56,0,227 movdqu 48(%esi),%xmm7 .byte 102,15,56,0,235 .byte 102,15,56,0,243 .byte 102,15,56,0,251 jmp .L004loop_shaext .align 16 .L004loop_shaext: decl %ecx leal 64(%esi),%eax movdqa %xmm1,(%esp) paddd %xmm4,%xmm1 cmovnel %eax,%esi movdqa %xmm0,16(%esp) .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 movdqu (%esi),%xmm4 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,213 movdqu 16(%esi),%xmm5 .byte 102,15,56,0,227 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,206 movdqu 32(%esi),%xmm6 .byte 102,15,56,0,235 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,215 movdqu 48(%esi),%xmm7 .byte 102,15,56,0,243 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 movdqa (%esp),%xmm2 .byte 102,15,56,0,251 .byte 15,56,200,202 paddd 16(%esp),%xmm0 jnz .L004loop_shaext pshufd $27,%xmm0,%xmm0 pshufd $27,%xmm1,%xmm1 movdqu %xmm0,(%edi) movd %xmm1,16(%edi) movl %ebx,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext .type _sha1_block_data_order_ssse3,@function .align 16 _sha1_block_data_order_ssse3: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L005pic_point .L005pic_point: popl %ebp leal .LK_XX_XX-.L005pic_point(%ebp),%ebp .Lssse3_shortcut: movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 movdqa 48(%ebp),%xmm2 movdqa 64(%ebp),%xmm6 movl 20(%esp),%edi movl 24(%esp),%ebp movl 28(%esp),%edx movl %esp,%esi subl $208,%esp andl $-64,%esp movdqa %xmm0,112(%esp) movdqa %xmm1,128(%esp) movdqa %xmm2,144(%esp) shll $6,%edx movdqa %xmm7,160(%esp) addl %ebp,%edx movdqa %xmm6,176(%esp) addl $64,%ebp movl %edi,192(%esp) movl %ebp,196(%esp) movl %edx,200(%esp) movl %esi,204(%esp) movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl 16(%edi),%edi movl %ebx,%esi movdqu -64(%ebp),%xmm0 movdqu -48(%ebp),%xmm1 movdqu -32(%ebp),%xmm2 movdqu -16(%ebp),%xmm3 .byte 102,15,56,0,198 .byte 102,15,56,0,206 .byte 102,15,56,0,214 movdqa %xmm7,96(%esp) .byte 102,15,56,0,222 paddd %xmm7,%xmm0 paddd %xmm7,%xmm1 paddd %xmm7,%xmm2 movdqa %xmm0,(%esp) psubd %xmm7,%xmm0 movdqa %xmm1,16(%esp) psubd %xmm7,%xmm1 movdqa %xmm2,32(%esp) movl %ecx,%ebp psubd %xmm7,%xmm2 xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi jmp .L006loop .align 16 .L006loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp punpcklqdq %xmm1,%xmm4 movdqa %xmm3,%xmm6 addl (%esp),%edi xorl %ecx,%ebx paddd %xmm3,%xmm7 movdqa %xmm0,64(%esp) roll $5,%eax addl %esi,%edi psrldq $4,%xmm6 andl %ebx,%ebp xorl %ecx,%ebx pxor %xmm0,%xmm4 addl %eax,%edi rorl $7,%eax pxor %xmm2,%xmm6 xorl %ecx,%ebp movl %edi,%esi addl 4(%esp),%edx pxor %xmm6,%xmm4 xorl %ebx,%eax roll $5,%edi movdqa %xmm7,48(%esp) addl %ebp,%edx andl %eax,%esi movdqa %xmm4,%xmm0 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi movdqa %xmm4,%xmm6 xorl %ebx,%esi pslldq $12,%xmm0 paddd %xmm4,%xmm4 movl %edx,%ebp addl 8(%esp),%ecx psrld $31,%xmm6 xorl %eax,%edi roll $5,%edx movdqa %xmm0,%xmm7 addl %esi,%ecx andl %edi,%ebp xorl %eax,%edi psrld $30,%xmm0 addl %edx,%ecx rorl $7,%edx por %xmm6,%xmm4 xorl %eax,%ebp movl %ecx,%esi addl 12(%esp),%ebx pslld $2,%xmm7 xorl %edi,%edx roll $5,%ecx pxor %xmm0,%xmm4 movdqa 96(%esp),%xmm0 addl %ebp,%ebx andl %edx,%esi pxor %xmm7,%xmm4 pshufd $238,%xmm1,%xmm5 xorl %edi,%edx addl %ecx,%ebx rorl $7,%ecx xorl %edi,%esi movl %ebx,%ebp punpcklqdq %xmm2,%xmm5 movdqa %xmm4,%xmm7 addl 16(%esp),%eax xorl %edx,%ecx paddd %xmm4,%xmm0 movdqa %xmm1,80(%esp) roll $5,%ebx addl %esi,%eax psrldq $4,%xmm7 andl %ecx,%ebp xorl %edx,%ecx pxor %xmm1,%xmm5 addl %ebx,%eax rorl $7,%ebx pxor %xmm3,%xmm7 xorl %edx,%ebp movl %eax,%esi addl 20(%esp),%edi pxor %xmm7,%xmm5 xorl %ecx,%ebx roll $5,%eax movdqa %xmm0,(%esp) addl %ebp,%edi andl %ebx,%esi movdqa %xmm5,%xmm1 xorl %ecx,%ebx addl %eax,%edi rorl $7,%eax movdqa %xmm5,%xmm7 xorl %ecx,%esi pslldq $12,%xmm1 paddd %xmm5,%xmm5 movl %edi,%ebp addl 24(%esp),%edx psrld $31,%xmm7 xorl %ebx,%eax roll $5,%edi movdqa %xmm1,%xmm0 addl %esi,%edx andl %eax,%ebp xorl %ebx,%eax psrld $30,%xmm1 addl %edi,%edx rorl $7,%edi por %xmm7,%xmm5 xorl %ebx,%ebp movl %edx,%esi addl 28(%esp),%ecx pslld $2,%xmm0 xorl %eax,%edi roll $5,%edx pxor %xmm1,%xmm5 movdqa 112(%esp),%xmm1 addl %ebp,%ecx andl %edi,%esi pxor %xmm0,%xmm5 pshufd $238,%xmm2,%xmm6 xorl %eax,%edi addl %edx,%ecx rorl $7,%edx xorl %eax,%esi movl %ecx,%ebp punpcklqdq %xmm3,%xmm6 movdqa %xmm5,%xmm0 addl 32(%esp),%ebx xorl %edi,%edx paddd %xmm5,%xmm1 movdqa %xmm2,96(%esp) roll $5,%ecx addl %esi,%ebx psrldq $4,%xmm0 andl %edx,%ebp xorl %edi,%edx pxor %xmm2,%xmm6 addl %ecx,%ebx rorl $7,%ecx pxor %xmm4,%xmm0 xorl %edi,%ebp movl %ebx,%esi addl 36(%esp),%eax pxor %xmm0,%xmm6 xorl %edx,%ecx roll $5,%ebx movdqa %xmm1,16(%esp) addl %ebp,%eax andl %ecx,%esi movdqa %xmm6,%xmm2 xorl %edx,%ecx addl %ebx,%eax rorl $7,%ebx movdqa %xmm6,%xmm0 xorl %edx,%esi pslldq $12,%xmm2 paddd %xmm6,%xmm6 movl %eax,%ebp addl 40(%esp),%edi psrld $31,%xmm0 xorl %ecx,%ebx roll $5,%eax movdqa %xmm2,%xmm1 addl %esi,%edi andl %ebx,%ebp xorl %ecx,%ebx psrld $30,%xmm2 addl %eax,%edi rorl $7,%eax por %xmm0,%xmm6 xorl %ecx,%ebp movdqa 64(%esp),%xmm0 movl %edi,%esi addl 44(%esp),%edx pslld $2,%xmm1 xorl %ebx,%eax roll $5,%edi pxor %xmm2,%xmm6 movdqa 112(%esp),%xmm2 addl %ebp,%edx andl %eax,%esi pxor %xmm1,%xmm6 pshufd $238,%xmm3,%xmm7 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi xorl %ebx,%esi movl %edx,%ebp punpcklqdq %xmm4,%xmm7 movdqa %xmm6,%xmm1 addl 48(%esp),%ecx xorl %eax,%edi paddd %xmm6,%xmm2 movdqa %xmm3,64(%esp) roll $5,%edx addl %esi,%ecx psrldq $4,%xmm1 andl %edi,%ebp xorl %eax,%edi pxor %xmm3,%xmm7 addl %edx,%ecx rorl $7,%edx pxor %xmm5,%xmm1 xorl %eax,%ebp movl %ecx,%esi addl 52(%esp),%ebx pxor %xmm1,%xmm7 xorl %edi,%edx roll $5,%ecx movdqa %xmm2,32(%esp) addl %ebp,%ebx andl %edx,%esi movdqa %xmm7,%xmm3 xorl %edi,%edx addl %ecx,%ebx rorl $7,%ecx movdqa %xmm7,%xmm1 xorl %edi,%esi pslldq $12,%xmm3 paddd %xmm7,%xmm7 movl %ebx,%ebp addl 56(%esp),%eax psrld $31,%xmm1 xorl %edx,%ecx roll $5,%ebx movdqa %xmm3,%xmm2 addl %esi,%eax andl %ecx,%ebp xorl %edx,%ecx psrld $30,%xmm3 addl %ebx,%eax rorl $7,%ebx por %xmm1,%xmm7 xorl %edx,%ebp movdqa 80(%esp),%xmm1 movl %eax,%esi addl 60(%esp),%edi pslld $2,%xmm2 xorl %ecx,%ebx roll $5,%eax pxor %xmm3,%xmm7 movdqa 112(%esp),%xmm3 addl %ebp,%edi andl %ebx,%esi pxor %xmm2,%xmm7 pshufd $238,%xmm6,%xmm2 xorl %ecx,%ebx addl %eax,%edi rorl $7,%eax pxor %xmm4,%xmm0 punpcklqdq %xmm7,%xmm2 xorl %ecx,%esi movl %edi,%ebp addl (%esp),%edx pxor %xmm1,%xmm0 movdqa %xmm4,80(%esp) xorl %ebx,%eax roll $5,%edi movdqa %xmm3,%xmm4 addl %esi,%edx paddd %xmm7,%xmm3 andl %eax,%ebp pxor %xmm2,%xmm0 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi xorl %ebx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) movl %edx,%esi addl 4(%esp),%ecx xorl %eax,%edi roll $5,%edx pslld $2,%xmm0 addl %ebp,%ecx andl %edi,%esi psrld $30,%xmm2 xorl %eax,%edi addl %edx,%ecx rorl $7,%edx xorl %eax,%esi movl %ecx,%ebp addl 8(%esp),%ebx xorl %edi,%edx roll $5,%ecx por %xmm2,%xmm0 addl %esi,%ebx andl %edx,%ebp movdqa 96(%esp),%xmm2 xorl %edi,%edx addl %ecx,%ebx addl 12(%esp),%eax xorl %edi,%ebp movl %ebx,%esi pshufd $238,%xmm7,%xmm3 roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 16(%esp),%edi pxor %xmm5,%xmm1 punpcklqdq %xmm0,%xmm3 xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm2,%xmm1 movdqa %xmm5,96(%esp) addl %esi,%edi xorl %ecx,%ebp movdqa %xmm4,%xmm5 rorl $7,%ebx paddd %xmm0,%xmm4 addl %eax,%edi pxor %xmm3,%xmm1 addl 20(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx pslld $2,%xmm1 addl 24(%esp),%ecx xorl %eax,%esi psrld $30,%xmm3 movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx por %xmm3,%xmm1 addl 28(%esp),%ebx xorl %edi,%ebp movdqa 64(%esp),%xmm3 movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx pshufd $238,%xmm0,%xmm4 addl %ecx,%ebx addl 32(%esp),%eax pxor %xmm6,%xmm2 punpcklqdq %xmm1,%xmm4 xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx pxor %xmm3,%xmm2 movdqa %xmm6,64(%esp) addl %esi,%eax xorl %edx,%ebp movdqa 128(%esp),%xmm6 rorl $7,%ecx paddd %xmm1,%xmm5 addl %ebx,%eax pxor %xmm4,%xmm2 addl 36(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi pslld $2,%xmm2 addl 40(%esp),%edx xorl %ebx,%esi psrld $30,%xmm4 movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx por %xmm4,%xmm2 addl 44(%esp),%ecx xorl %eax,%ebp movdqa 80(%esp),%xmm4 movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi pshufd $238,%xmm1,%xmm5 addl %edx,%ecx addl 48(%esp),%ebx pxor %xmm7,%xmm3 punpcklqdq %xmm2,%xmm5 xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx pxor %xmm4,%xmm3 movdqa %xmm7,80(%esp) addl %esi,%ebx xorl %edi,%ebp movdqa %xmm6,%xmm7 rorl $7,%edx paddd %xmm2,%xmm6 addl %ecx,%ebx pxor %xmm5,%xmm3 addl 52(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax pslld $2,%xmm3 addl 56(%esp),%edi xorl %ecx,%esi psrld $30,%xmm5 movl %eax,%ebp roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi por %xmm5,%xmm3 addl 60(%esp),%edx xorl %ebx,%ebp movdqa 96(%esp),%xmm5 movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax pshufd $238,%xmm2,%xmm6 addl %edi,%edx addl (%esp),%ecx pxor %xmm0,%xmm4 punpcklqdq %xmm3,%xmm6 xorl %eax,%esi movl %edx,%ebp roll $5,%edx pxor %xmm5,%xmm4 movdqa %xmm0,96(%esp) addl %esi,%ecx xorl %eax,%ebp movdqa %xmm7,%xmm0 rorl $7,%edi paddd %xmm3,%xmm7 addl %edx,%ecx pxor %xmm6,%xmm4 addl 4(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm6 movdqa %xmm7,48(%esp) addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx pslld $2,%xmm4 addl 8(%esp),%eax xorl %edx,%esi psrld $30,%xmm6 movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax por %xmm6,%xmm4 addl 12(%esp),%edi xorl %ecx,%ebp movdqa 64(%esp),%xmm6 movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx pshufd $238,%xmm3,%xmm7 addl %eax,%edi addl 16(%esp),%edx pxor %xmm1,%xmm5 punpcklqdq %xmm4,%xmm7 xorl %ebx,%esi movl %edi,%ebp roll $5,%edi pxor %xmm6,%xmm5 movdqa %xmm1,64(%esp) addl %esi,%edx xorl %ebx,%ebp movdqa %xmm0,%xmm1 rorl $7,%eax paddd %xmm4,%xmm0 addl %edi,%edx pxor %xmm7,%xmm5 addl 20(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm7 movdqa %xmm0,(%esp) addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx pslld $2,%xmm5 addl 24(%esp),%ebx xorl %edi,%esi psrld $30,%xmm7 movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx por %xmm7,%xmm5 addl 28(%esp),%eax movdqa 80(%esp),%xmm7 rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx pshufd $238,%xmm4,%xmm0 addl %ebp,%eax xorl %ecx,%esi xorl %edx,%ecx addl %ebx,%eax addl 32(%esp),%edi pxor %xmm2,%xmm6 punpcklqdq %xmm5,%xmm0 andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx pxor %xmm7,%xmm6 movdqa %xmm2,80(%esp) movl %eax,%ebp xorl %ecx,%esi roll $5,%eax movdqa %xmm1,%xmm2 addl %esi,%edi paddd %xmm5,%xmm1 xorl %ebx,%ebp pxor %xmm0,%xmm6 xorl %ecx,%ebx addl %eax,%edi addl 36(%esp),%edx andl %ebx,%ebp movdqa %xmm6,%xmm0 movdqa %xmm1,16(%esp) xorl %ecx,%ebx rorl $7,%eax movl %edi,%esi xorl %ebx,%ebp roll $5,%edi pslld $2,%xmm6 addl %ebp,%edx xorl %eax,%esi psrld $30,%xmm0 xorl %ebx,%eax addl %edi,%edx addl 40(%esp),%ecx andl %eax,%esi xorl %ebx,%eax rorl $7,%edi por %xmm0,%xmm6 movl %edx,%ebp xorl %eax,%esi movdqa 96(%esp),%xmm0 roll $5,%edx addl %esi,%ecx xorl %edi,%ebp xorl %eax,%edi addl %edx,%ecx pshufd $238,%xmm5,%xmm1 addl 44(%esp),%ebx andl %edi,%ebp xorl %eax,%edi rorl $7,%edx movl %ecx,%esi xorl %edi,%ebp roll $5,%ecx addl %ebp,%ebx xorl %edx,%esi xorl %edi,%edx addl %ecx,%ebx addl 48(%esp),%eax pxor %xmm3,%xmm7 punpcklqdq %xmm6,%xmm1 andl %edx,%esi xorl %edi,%edx rorl $7,%ecx pxor %xmm0,%xmm7 movdqa %xmm3,96(%esp) movl %ebx,%ebp xorl %edx,%esi roll $5,%ebx movdqa 144(%esp),%xmm3 addl %esi,%eax paddd %xmm6,%xmm2 xorl %ecx,%ebp pxor %xmm1,%xmm7 xorl %edx,%ecx addl %ebx,%eax addl 52(%esp),%edi andl %ecx,%ebp movdqa %xmm7,%xmm1 movdqa %xmm2,32(%esp) xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%ebp roll $5,%eax pslld $2,%xmm7 addl %ebp,%edi xorl %ebx,%esi psrld $30,%xmm1 xorl %ecx,%ebx addl %eax,%edi addl 56(%esp),%edx andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax por %xmm1,%xmm7 movl %edi,%ebp xorl %ebx,%esi movdqa 64(%esp),%xmm1 roll $5,%edi addl %esi,%edx xorl %eax,%ebp xorl %ebx,%eax addl %edi,%edx pshufd $238,%xmm6,%xmm2 addl 60(%esp),%ecx andl %eax,%ebp xorl %ebx,%eax rorl $7,%edi movl %edx,%esi xorl %eax,%ebp roll $5,%edx addl %ebp,%ecx xorl %edi,%esi xorl %eax,%edi addl %edx,%ecx addl (%esp),%ebx pxor %xmm4,%xmm0 punpcklqdq %xmm7,%xmm2 andl %edi,%esi xorl %eax,%edi rorl $7,%edx pxor %xmm1,%xmm0 movdqa %xmm4,64(%esp) movl %ecx,%ebp xorl %edi,%esi roll $5,%ecx movdqa %xmm3,%xmm4 addl %esi,%ebx paddd %xmm7,%xmm3 xorl %edx,%ebp pxor %xmm2,%xmm0 xorl %edi,%edx addl %ecx,%ebx addl 4(%esp),%eax andl %edx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) xorl %edi,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx pslld $2,%xmm0 addl %ebp,%eax xorl %ecx,%esi psrld $30,%xmm2 xorl %edx,%ecx addl %ebx,%eax addl 8(%esp),%edi andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx por %xmm2,%xmm0 movl %eax,%ebp xorl %ecx,%esi movdqa 80(%esp),%xmm2 roll $5,%eax addl %esi,%edi xorl %ebx,%ebp xorl %ecx,%ebx addl %eax,%edi pshufd $238,%xmm7,%xmm3 addl 12(%esp),%edx andl %ebx,%ebp xorl %ecx,%ebx rorl $7,%eax movl %edi,%esi xorl %ebx,%ebp roll $5,%edi addl %ebp,%edx xorl %eax,%esi xorl %ebx,%eax addl %edi,%edx addl 16(%esp),%ecx pxor %xmm5,%xmm1 punpcklqdq %xmm0,%xmm3 andl %eax,%esi xorl %ebx,%eax rorl $7,%edi pxor %xmm2,%xmm1 movdqa %xmm5,80(%esp) movl %edx,%ebp xorl %eax,%esi roll $5,%edx movdqa %xmm4,%xmm5 addl %esi,%ecx paddd %xmm0,%xmm4 xorl %edi,%ebp pxor %xmm3,%xmm1 xorl %eax,%edi addl %edx,%ecx addl 20(%esp),%ebx andl %edi,%ebp movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) xorl %eax,%edi rorl $7,%edx movl %ecx,%esi xorl %edi,%ebp roll $5,%ecx pslld $2,%xmm1 addl %ebp,%ebx xorl %edx,%esi psrld $30,%xmm3 xorl %edi,%edx addl %ecx,%ebx addl 24(%esp),%eax andl %edx,%esi xorl %edi,%edx rorl $7,%ecx por %xmm3,%xmm1 movl %ebx,%ebp xorl %edx,%esi movdqa 96(%esp),%xmm3 roll $5,%ebx addl %esi,%eax xorl %ecx,%ebp xorl %edx,%ecx addl %ebx,%eax pshufd $238,%xmm0,%xmm4 addl 28(%esp),%edi andl %ecx,%ebp xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%ebp roll $5,%eax addl %ebp,%edi xorl %ebx,%esi xorl %ecx,%ebx addl %eax,%edi addl 32(%esp),%edx pxor %xmm6,%xmm2 punpcklqdq %xmm1,%xmm4 andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax pxor %xmm3,%xmm2 movdqa %xmm6,96(%esp) movl %edi,%ebp xorl %ebx,%esi roll $5,%edi movdqa %xmm5,%xmm6 addl %esi,%edx paddd %xmm1,%xmm5 xorl %eax,%ebp pxor %xmm4,%xmm2 xorl %ebx,%eax addl %edi,%edx addl 36(%esp),%ecx andl %eax,%ebp movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) xorl %ebx,%eax rorl $7,%edi movl %edx,%esi xorl %eax,%ebp roll $5,%edx pslld $2,%xmm2 addl %ebp,%ecx xorl %edi,%esi psrld $30,%xmm4 xorl %eax,%edi addl %edx,%ecx addl 40(%esp),%ebx andl %edi,%esi xorl %eax,%edi rorl $7,%edx por %xmm4,%xmm2 movl %ecx,%ebp xorl %edi,%esi movdqa 64(%esp),%xmm4 roll $5,%ecx addl %esi,%ebx xorl %edx,%ebp xorl %edi,%edx addl %ecx,%ebx pshufd $238,%xmm1,%xmm5 addl 44(%esp),%eax andl %edx,%ebp xorl %edi,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx addl %ebp,%eax xorl %edx,%esi addl %ebx,%eax addl 48(%esp),%edi pxor %xmm7,%xmm3 punpcklqdq %xmm2,%xmm5 xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm4,%xmm3 movdqa %xmm7,64(%esp) addl %esi,%edi xorl %ecx,%ebp movdqa %xmm6,%xmm7 rorl $7,%ebx paddd %xmm2,%xmm6 addl %eax,%edi pxor %xmm5,%xmm3 addl 52(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx pslld $2,%xmm3 addl 56(%esp),%ecx xorl %eax,%esi psrld $30,%xmm5 movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx por %xmm5,%xmm3 addl 60(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx addl (%esp),%eax xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx paddd %xmm3,%xmm7 addl %ebx,%eax addl 4(%esp),%edi xorl %ecx,%ebp movl %eax,%esi movdqa %xmm7,48(%esp) roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 8(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx addl 12(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp je .L007done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 movdqu 16(%ebp),%xmm1 movdqu 32(%ebp),%xmm2 movdqu 48(%ebp),%xmm3 addl $64,%ebp .byte 102,15,56,0,198 movl %ebp,196(%esp) movdqa %xmm7,96(%esp) addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx .byte 102,15,56,0,206 addl %ecx,%ebx addl 20(%esp),%eax xorl %edx,%ebp movl %ebx,%esi paddd %xmm7,%xmm0 roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx movdqa %xmm0,(%esp) addl %ebx,%eax addl 24(%esp),%edi xorl %ecx,%esi movl %eax,%ebp psubd %xmm7,%xmm0 roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi addl 28(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx addl 32(%esp),%ecx xorl %eax,%esi movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi .byte 102,15,56,0,214 addl %edx,%ecx addl 36(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi paddd %xmm7,%xmm1 roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx movdqa %xmm1,16(%esp) addl %ecx,%ebx addl 40(%esp),%eax xorl %edx,%esi movl %ebx,%ebp psubd %xmm7,%xmm1 roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax addl 44(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 48(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax .byte 102,15,56,0,222 addl %edi,%edx addl 52(%esp),%ecx xorl %eax,%ebp movl %edx,%esi paddd %xmm7,%xmm2 roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi movdqa %xmm2,32(%esp) addl %edx,%ecx addl 56(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp psubd %xmm7,%xmm2 roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 60(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax rorl $7,%ecx addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax addl 4(%ebp),%esi addl 8(%ebp),%ecx movl %eax,(%ebp) addl 12(%ebp),%edx movl %esi,4(%ebp) addl 16(%ebp),%edi movl %ecx,8(%ebp) movl %ecx,%ebx movl %edx,12(%ebp) xorl %edx,%ebx movl %edi,16(%ebp) movl %esi,%ebp pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx jmp .L006loop .align 16 .L007done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 20(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 24(%esp),%edi xorl %ecx,%esi movl %eax,%ebp roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi addl 28(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx addl 32(%esp),%ecx xorl %eax,%esi movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx addl 36(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx addl 40(%esp),%eax xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax addl 44(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 48(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx addl 52(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx addl 56(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 60(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax rorl $7,%ecx addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax movl 204(%esp),%esp addl 4(%ebp),%esi addl 8(%ebp),%ecx movl %eax,(%ebp) addl 12(%ebp),%edx movl %esi,4(%ebp) addl 16(%ebp),%edi movl %ecx,8(%ebp) movl %edx,12(%ebp) movl %edi,16(%ebp) popl %edi popl %esi popl %ebx popl %ebp ret .size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.type _sha1_block_data_order_avx,@function +.align 16 +_sha1_block_data_order_avx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L008pic_point +.L008pic_point: + popl %ebp + leal .LK_XX_XX-.L008pic_point(%ebp),%ebp +.Lavx_shortcut: + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp .L009loop +.align 16 +.L009loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L010done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp .L009loop +.align 16 +.L010done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 .long 1859775393,1859775393,1859775393,1859775393 .long 2400959708,2400959708,2400959708,2400959708 .long 3395469782,3395469782,3395469782,3395469782 .long 66051,67438087,134810123,202182159 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "sha1-586.S" .text .globl sha1_block_data_order .type sha1_block_data_order,@function .align 16 sha1_block_data_order: .L_sha1_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L000pic_point .L000pic_point: popl %ebp leal OPENSSL_ia32cap_P,%esi leal .LK_XX_XX-.L000pic_point(%ebp),%ebp movl (%esi),%eax movl 4(%esi),%edx testl $512,%edx jz .L001x86 movl 8(%esi),%ecx testl $16777216,%eax jz .L001x86 testl $536870912,%ecx jnz .Lshaext_shortcut + andl $268435456,%edx + andl $1073741824,%eax + orl %edx,%eax + cmpl $1342177280,%eax + je .Lavx_shortcut jmp .Lssse3_shortcut .align 16 .L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax subl $76,%esp shll $6,%eax addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi jmp .L002loop .align 16 .L002loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,(%esp) movl %ebx,4(%esp) movl %ecx,8(%esp) movl %edx,12(%esp) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,16(%esp) movl %ebx,20(%esp) movl %ecx,24(%esp) movl %edx,28(%esp) movl 32(%esi),%eax movl 36(%esi),%ebx movl 40(%esi),%ecx movl 44(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,40(%esp) movl %edx,44(%esp) movl 48(%esi),%eax movl 52(%esi),%ebx movl 56(%esi),%ecx movl 60(%esi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx movl %eax,48(%esp) movl %ebx,52(%esp) movl %ecx,56(%esp) movl %edx,60(%esp) movl %esi,100(%esp) movl (%ebp),%eax movl 4(%ebp),%ebx movl 8(%ebp),%ecx movl 12(%ebp),%edx movl %ecx,%esi movl %eax,%ebp roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl (%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 4(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 8(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 12(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp movl 16(%esp),%eax andl %edx,%ebx rorl $2,%edx xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp movl %edx,%eax movl %ebp,%ebx roll $5,%ebp xorl %edi,%eax addl %esi,%ebp movl 20(%esp),%esi andl %ecx,%eax rorl $2,%ecx xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp movl %ecx,%esi movl %ebp,%eax roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl 24(%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 28(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 32(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 36(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp movl 40(%esp),%eax andl %edx,%ebx rorl $2,%edx xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp movl %edx,%eax movl %ebp,%ebx roll $5,%ebp xorl %edi,%eax addl %esi,%ebp movl 44(%esp),%esi andl %ecx,%eax rorl $2,%ecx xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp movl %ecx,%esi movl %ebp,%eax roll $5,%ebp xorl %edx,%esi addl %edi,%ebp movl 48(%esp),%edi andl %ebx,%esi rorl $2,%ebx xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp movl %ebx,%edi movl %ebp,%esi roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp movl 52(%esp),%edx andl %eax,%edi rorl $2,%eax xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp movl %eax,%edx movl %ebp,%edi roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp movl 56(%esp),%ecx andl %esi,%edx rorl $2,%esi xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp movl %esi,%ecx movl %ebp,%edx roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp movl 60(%esp),%ebx andl %edi,%ecx rorl $2,%edi xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp movl (%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 8(%esp),%ebx xorl %esi,%ebp xorl 32(%esp),%ebx andl %edx,%ebp xorl 52(%esp),%ebx roll $1,%ebx xorl %esi,%ebp addl %ebp,%eax movl %ecx,%ebp rorl $2,%edx movl %ebx,(%esp) roll $5,%ebp leal 1518500249(%ebx,%eax,1),%ebx movl 4(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 12(%esp),%eax xorl %edi,%ebp xorl 36(%esp),%eax andl %ecx,%ebp xorl 56(%esp),%eax roll $1,%eax xorl %edi,%ebp addl %ebp,%esi movl %ebx,%ebp rorl $2,%ecx movl %eax,4(%esp) roll $5,%ebp leal 1518500249(%eax,%esi,1),%eax movl 8(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 40(%esp),%esi andl %ebx,%ebp xorl 60(%esp),%esi roll $1,%esi xorl %edx,%ebp addl %ebp,%edi movl %eax,%ebp rorl $2,%ebx movl %esi,8(%esp) roll $5,%ebp leal 1518500249(%esi,%edi,1),%esi movl 12(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 44(%esp),%edi andl %eax,%ebp xorl (%esp),%edi roll $1,%edi xorl %ecx,%ebp addl %ebp,%edx movl %esi,%ebp rorl $2,%eax movl %edi,12(%esp) roll $5,%ebp leal 1518500249(%edi,%edx,1),%edi movl 16(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,16(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 20(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,20(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 24(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,24(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 28(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,28(%esp) leal 1859775393(%eax,%esi,1),%eax movl 32(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,32(%esp) leal 1859775393(%esi,%edi,1),%esi movl 36(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,36(%esp) leal 1859775393(%edi,%edx,1),%edi movl 40(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,40(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 44(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,44(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 48(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,48(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 52(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,52(%esp) leal 1859775393(%eax,%esi,1),%eax movl 56(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,56(%esp) leal 1859775393(%esi,%edi,1),%esi movl 60(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,60(%esp) leal 1859775393(%edi,%edx,1),%edi movl (%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 8(%esp),%edx xorl %eax,%ebp xorl 32(%esp),%edx xorl %ebx,%ebp xorl 52(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 4(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 12(%esp),%ecx xorl %esi,%ebp xorl 36(%esp),%ecx xorl %eax,%ebp xorl 56(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,4(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 8(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 16(%esp),%ebx xorl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl 60(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,8(%esp) leal 1859775393(%ebx,%eax,1),%ebx movl 12(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 20(%esp),%eax xorl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl (%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,12(%esp) leal 1859775393(%eax,%esi,1),%eax movl 16(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 24(%esp),%esi xorl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 4(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,16(%esp) leal 1859775393(%esi,%edi,1),%esi movl 20(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 28(%esp),%edi xorl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 8(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,20(%esp) leal 1859775393(%edi,%edx,1),%edi movl 24(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 32(%esp),%edx xorl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 12(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,24(%esp) leal 1859775393(%edx,%ecx,1),%edx movl 28(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 36(%esp),%ecx xorl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 16(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,28(%esp) leal 1859775393(%ecx,%ebx,1),%ecx movl 32(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl (%esp),%ebx andl %edx,%ebp xorl 20(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,32(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 36(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl 4(%esp),%eax andl %ecx,%ebp xorl 24(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,36(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 40(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 8(%esp),%esi andl %ebx,%ebp xorl 28(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,40(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 44(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 12(%esp),%edi andl %eax,%ebp xorl 32(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,44(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 48(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 16(%esp),%edx andl %esi,%ebp xorl 36(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,48(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 52(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 20(%esp),%ecx andl %edi,%ebp xorl 40(%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,52(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 56(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl (%esp),%ebx xorl %esi,%ebp xorl 24(%esp),%ebx andl %edx,%ebp xorl 44(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,56(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 60(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 4(%esp),%eax xorl %edi,%ebp xorl 28(%esp),%eax andl %ecx,%ebp xorl 48(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,60(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl (%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 8(%esp),%esi xorl %edx,%ebp xorl 32(%esp),%esi andl %ebx,%ebp xorl 52(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 4(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 12(%esp),%edi xorl %ecx,%ebp xorl 36(%esp),%edi andl %eax,%ebp xorl 56(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,4(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 8(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 16(%esp),%edx xorl %ebx,%ebp xorl 40(%esp),%edx andl %esi,%ebp xorl 60(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,8(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 12(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 20(%esp),%ecx xorl %eax,%ebp xorl 44(%esp),%ecx andl %edi,%ebp xorl (%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,12(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 16(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 24(%esp),%ebx xorl %esi,%ebp xorl 48(%esp),%ebx andl %edx,%ebp xorl 4(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,16(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 20(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 28(%esp),%eax xorl %edi,%ebp xorl 52(%esp),%eax andl %ecx,%ebp xorl 8(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,20(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 24(%esp),%esi addl %ebp,%eax movl %ecx,%ebp xorl 32(%esp),%esi xorl %edx,%ebp xorl 56(%esp),%esi andl %ebx,%ebp xorl 12(%esp),%esi roll $1,%esi addl %edi,%ebp rorl $2,%ebx movl %eax,%edi roll $5,%edi movl %esi,24(%esp) leal 2400959708(%esi,%ebp,1),%esi movl %ecx,%ebp addl %edi,%esi andl %edx,%ebp movl 28(%esp),%edi addl %ebp,%esi movl %ebx,%ebp xorl 36(%esp),%edi xorl %ecx,%ebp xorl 60(%esp),%edi andl %eax,%ebp xorl 16(%esp),%edi roll $1,%edi addl %edx,%ebp rorl $2,%eax movl %esi,%edx roll $5,%edx movl %edi,28(%esp) leal 2400959708(%edi,%ebp,1),%edi movl %ebx,%ebp addl %edx,%edi andl %ecx,%ebp movl 32(%esp),%edx addl %ebp,%edi movl %eax,%ebp xorl 40(%esp),%edx xorl %ebx,%ebp xorl (%esp),%edx andl %esi,%ebp xorl 20(%esp),%edx roll $1,%edx addl %ecx,%ebp rorl $2,%esi movl %edi,%ecx roll $5,%ecx movl %edx,32(%esp) leal 2400959708(%edx,%ebp,1),%edx movl %eax,%ebp addl %ecx,%edx andl %ebx,%ebp movl 36(%esp),%ecx addl %ebp,%edx movl %esi,%ebp xorl 44(%esp),%ecx xorl %eax,%ebp xorl 4(%esp),%ecx andl %edi,%ebp xorl 24(%esp),%ecx roll $1,%ecx addl %ebx,%ebp rorl $2,%edi movl %edx,%ebx roll $5,%ebx movl %ecx,36(%esp) leal 2400959708(%ecx,%ebp,1),%ecx movl %esi,%ebp addl %ebx,%ecx andl %eax,%ebp movl 40(%esp),%ebx addl %ebp,%ecx movl %edi,%ebp xorl 48(%esp),%ebx xorl %esi,%ebp xorl 8(%esp),%ebx andl %edx,%ebp xorl 28(%esp),%ebx roll $1,%ebx addl %eax,%ebp rorl $2,%edx movl %ecx,%eax roll $5,%eax movl %ebx,40(%esp) leal 2400959708(%ebx,%ebp,1),%ebx movl %edi,%ebp addl %eax,%ebx andl %esi,%ebp movl 44(%esp),%eax addl %ebp,%ebx movl %edx,%ebp xorl 52(%esp),%eax xorl %edi,%ebp xorl 12(%esp),%eax andl %ecx,%ebp xorl 32(%esp),%eax roll $1,%eax addl %esi,%ebp rorl $2,%ecx movl %ebx,%esi roll $5,%esi movl %eax,44(%esp) leal 2400959708(%eax,%ebp,1),%eax movl %edx,%ebp addl %esi,%eax andl %edi,%ebp movl 48(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 56(%esp),%esi xorl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 36(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,48(%esp) leal 3395469782(%esi,%edi,1),%esi movl 52(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 60(%esp),%edi xorl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 40(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,52(%esp) leal 3395469782(%edi,%edx,1),%edi movl 56(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl (%esp),%edx xorl %eax,%ebp xorl 24(%esp),%edx xorl %ebx,%ebp xorl 44(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,56(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 60(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 4(%esp),%ecx xorl %esi,%ebp xorl 28(%esp),%ecx xorl %eax,%ebp xorl 48(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,60(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl (%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 8(%esp),%ebx xorl %edi,%ebp xorl 32(%esp),%ebx xorl %esi,%ebp xorl 52(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 4(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 12(%esp),%eax xorl %edx,%ebp xorl 36(%esp),%eax xorl %edi,%ebp xorl 56(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,4(%esp) leal 3395469782(%eax,%esi,1),%eax movl 8(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 16(%esp),%esi xorl %ecx,%ebp xorl 40(%esp),%esi xorl %edx,%ebp xorl 60(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,8(%esp) leal 3395469782(%esi,%edi,1),%esi movl 12(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 20(%esp),%edi xorl %ebx,%ebp xorl 44(%esp),%edi xorl %ecx,%ebp xorl (%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,12(%esp) leal 3395469782(%edi,%edx,1),%edi movl 16(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,16(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 20(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,20(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl 24(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,24(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 28(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp movl %eax,28(%esp) leal 3395469782(%eax,%esi,1),%eax movl 32(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp movl %esi,32(%esp) leal 3395469782(%esi,%edi,1),%esi movl 36(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp movl %edi,36(%esp) leal 3395469782(%edi,%edx,1),%edi movl 40(%esp),%edx addl %ebp,%edi movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx addl %ebp,%ecx rorl $2,%esi movl %edi,%ebp roll $5,%ebp movl %edx,40(%esp) leal 3395469782(%edx,%ecx,1),%edx movl 44(%esp),%ecx addl %ebp,%edx movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx addl %ebp,%ebx rorl $2,%edi movl %edx,%ebp roll $5,%ebp movl %ecx,44(%esp) leal 3395469782(%ecx,%ebx,1),%ecx movl 48(%esp),%ebx addl %ebp,%ecx movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx addl %ebp,%eax rorl $2,%edx movl %ecx,%ebp roll $5,%ebp movl %ebx,48(%esp) leal 3395469782(%ebx,%eax,1),%ebx movl 52(%esp),%eax addl %ebp,%ebx movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax addl %ebp,%esi rorl $2,%ecx movl %ebx,%ebp roll $5,%ebp leal 3395469782(%eax,%esi,1),%eax movl 56(%esp),%esi addl %ebp,%eax movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi addl %ebp,%edi rorl $2,%ebx movl %eax,%ebp roll $5,%ebp leal 3395469782(%esi,%edi,1),%esi movl 60(%esp),%edi addl %ebp,%esi movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi addl %ebp,%edx rorl $2,%eax movl %esi,%ebp roll $5,%ebp leal 3395469782(%edi,%edx,1),%edi addl %ebp,%edi movl 96(%esp),%ebp movl 100(%esp),%edx addl (%ebp),%edi addl 4(%ebp),%esi addl 8(%ebp),%eax addl 12(%ebp),%ebx addl 16(%ebp),%ecx movl %edi,(%ebp) addl $64,%edx movl %esi,4(%ebp) cmpl 104(%esp),%edx movl %eax,8(%ebp) movl %ecx,%edi movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) jb .L002loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size sha1_block_data_order,.-.L_sha1_block_data_order_begin .type _sha1_block_data_order_shaext,@function .align 16 _sha1_block_data_order_shaext: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L003pic_point .L003pic_point: popl %ebp leal .LK_XX_XX-.L003pic_point(%ebp),%ebp .Lshaext_shortcut: movl 20(%esp),%edi movl %esp,%ebx movl 24(%esp),%esi movl 28(%esp),%ecx subl $32,%esp movdqu (%edi),%xmm0 movd 16(%edi),%xmm1 andl $-32,%esp movdqa 80(%ebp),%xmm3 movdqu (%esi),%xmm4 pshufd $27,%xmm0,%xmm0 movdqu 16(%esi),%xmm5 pshufd $27,%xmm1,%xmm1 movdqu 32(%esi),%xmm6 .byte 102,15,56,0,227 movdqu 48(%esi),%xmm7 .byte 102,15,56,0,235 .byte 102,15,56,0,243 .byte 102,15,56,0,251 jmp .L004loop_shaext .align 16 .L004loop_shaext: decl %ecx leal 64(%esi),%eax movdqa %xmm1,(%esp) paddd %xmm4,%xmm1 cmovnel %eax,%esi movdqa %xmm0,16(%esp) .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,0 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,0 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,1 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,1 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 .byte 15,56,201,229 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,213 pxor %xmm6,%xmm4 .byte 15,56,201,238 .byte 15,56,202,231 movdqa %xmm0,%xmm1 .byte 15,58,204,194,2 .byte 15,56,200,206 pxor %xmm7,%xmm5 .byte 15,56,202,236 .byte 15,56,201,247 movdqa %xmm0,%xmm2 .byte 15,58,204,193,2 .byte 15,56,200,215 pxor %xmm4,%xmm6 .byte 15,56,201,252 .byte 15,56,202,245 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,204 pxor %xmm5,%xmm7 .byte 15,56,202,254 movdqu (%esi),%xmm4 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,213 movdqu 16(%esi),%xmm5 .byte 102,15,56,0,227 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 .byte 15,56,200,206 movdqu 32(%esi),%xmm6 .byte 102,15,56,0,235 movdqa %xmm0,%xmm2 .byte 15,58,204,193,3 .byte 15,56,200,215 movdqu 48(%esi),%xmm7 .byte 102,15,56,0,243 movdqa %xmm0,%xmm1 .byte 15,58,204,194,3 movdqa (%esp),%xmm2 .byte 102,15,56,0,251 .byte 15,56,200,202 paddd 16(%esp),%xmm0 jnz .L004loop_shaext pshufd $27,%xmm0,%xmm0 pshufd $27,%xmm1,%xmm1 movdqu %xmm0,(%edi) movd %xmm1,16(%edi) movl %ebx,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext .type _sha1_block_data_order_ssse3,@function .align 16 _sha1_block_data_order_ssse3: pushl %ebp pushl %ebx pushl %esi pushl %edi call .L005pic_point .L005pic_point: popl %ebp leal .LK_XX_XX-.L005pic_point(%ebp),%ebp .Lssse3_shortcut: movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 movdqa 48(%ebp),%xmm2 movdqa 64(%ebp),%xmm6 movl 20(%esp),%edi movl 24(%esp),%ebp movl 28(%esp),%edx movl %esp,%esi subl $208,%esp andl $-64,%esp movdqa %xmm0,112(%esp) movdqa %xmm1,128(%esp) movdqa %xmm2,144(%esp) shll $6,%edx movdqa %xmm7,160(%esp) addl %ebp,%edx movdqa %xmm6,176(%esp) addl $64,%ebp movl %edi,192(%esp) movl %ebp,196(%esp) movl %edx,200(%esp) movl %esi,204(%esp) movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx movl 16(%edi),%edi movl %ebx,%esi movdqu -64(%ebp),%xmm0 movdqu -48(%ebp),%xmm1 movdqu -32(%ebp),%xmm2 movdqu -16(%ebp),%xmm3 .byte 102,15,56,0,198 .byte 102,15,56,0,206 .byte 102,15,56,0,214 movdqa %xmm7,96(%esp) .byte 102,15,56,0,222 paddd %xmm7,%xmm0 paddd %xmm7,%xmm1 paddd %xmm7,%xmm2 movdqa %xmm0,(%esp) psubd %xmm7,%xmm0 movdqa %xmm1,16(%esp) psubd %xmm7,%xmm1 movdqa %xmm2,32(%esp) movl %ecx,%ebp psubd %xmm7,%xmm2 xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi jmp .L006loop .align 16 .L006loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp punpcklqdq %xmm1,%xmm4 movdqa %xmm3,%xmm6 addl (%esp),%edi xorl %ecx,%ebx paddd %xmm3,%xmm7 movdqa %xmm0,64(%esp) roll $5,%eax addl %esi,%edi psrldq $4,%xmm6 andl %ebx,%ebp xorl %ecx,%ebx pxor %xmm0,%xmm4 addl %eax,%edi rorl $7,%eax pxor %xmm2,%xmm6 xorl %ecx,%ebp movl %edi,%esi addl 4(%esp),%edx pxor %xmm6,%xmm4 xorl %ebx,%eax roll $5,%edi movdqa %xmm7,48(%esp) addl %ebp,%edx andl %eax,%esi movdqa %xmm4,%xmm0 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi movdqa %xmm4,%xmm6 xorl %ebx,%esi pslldq $12,%xmm0 paddd %xmm4,%xmm4 movl %edx,%ebp addl 8(%esp),%ecx psrld $31,%xmm6 xorl %eax,%edi roll $5,%edx movdqa %xmm0,%xmm7 addl %esi,%ecx andl %edi,%ebp xorl %eax,%edi psrld $30,%xmm0 addl %edx,%ecx rorl $7,%edx por %xmm6,%xmm4 xorl %eax,%ebp movl %ecx,%esi addl 12(%esp),%ebx pslld $2,%xmm7 xorl %edi,%edx roll $5,%ecx pxor %xmm0,%xmm4 movdqa 96(%esp),%xmm0 addl %ebp,%ebx andl %edx,%esi pxor %xmm7,%xmm4 pshufd $238,%xmm1,%xmm5 xorl %edi,%edx addl %ecx,%ebx rorl $7,%ecx xorl %edi,%esi movl %ebx,%ebp punpcklqdq %xmm2,%xmm5 movdqa %xmm4,%xmm7 addl 16(%esp),%eax xorl %edx,%ecx paddd %xmm4,%xmm0 movdqa %xmm1,80(%esp) roll $5,%ebx addl %esi,%eax psrldq $4,%xmm7 andl %ecx,%ebp xorl %edx,%ecx pxor %xmm1,%xmm5 addl %ebx,%eax rorl $7,%ebx pxor %xmm3,%xmm7 xorl %edx,%ebp movl %eax,%esi addl 20(%esp),%edi pxor %xmm7,%xmm5 xorl %ecx,%ebx roll $5,%eax movdqa %xmm0,(%esp) addl %ebp,%edi andl %ebx,%esi movdqa %xmm5,%xmm1 xorl %ecx,%ebx addl %eax,%edi rorl $7,%eax movdqa %xmm5,%xmm7 xorl %ecx,%esi pslldq $12,%xmm1 paddd %xmm5,%xmm5 movl %edi,%ebp addl 24(%esp),%edx psrld $31,%xmm7 xorl %ebx,%eax roll $5,%edi movdqa %xmm1,%xmm0 addl %esi,%edx andl %eax,%ebp xorl %ebx,%eax psrld $30,%xmm1 addl %edi,%edx rorl $7,%edi por %xmm7,%xmm5 xorl %ebx,%ebp movl %edx,%esi addl 28(%esp),%ecx pslld $2,%xmm0 xorl %eax,%edi roll $5,%edx pxor %xmm1,%xmm5 movdqa 112(%esp),%xmm1 addl %ebp,%ecx andl %edi,%esi pxor %xmm0,%xmm5 pshufd $238,%xmm2,%xmm6 xorl %eax,%edi addl %edx,%ecx rorl $7,%edx xorl %eax,%esi movl %ecx,%ebp punpcklqdq %xmm3,%xmm6 movdqa %xmm5,%xmm0 addl 32(%esp),%ebx xorl %edi,%edx paddd %xmm5,%xmm1 movdqa %xmm2,96(%esp) roll $5,%ecx addl %esi,%ebx psrldq $4,%xmm0 andl %edx,%ebp xorl %edi,%edx pxor %xmm2,%xmm6 addl %ecx,%ebx rorl $7,%ecx pxor %xmm4,%xmm0 xorl %edi,%ebp movl %ebx,%esi addl 36(%esp),%eax pxor %xmm0,%xmm6 xorl %edx,%ecx roll $5,%ebx movdqa %xmm1,16(%esp) addl %ebp,%eax andl %ecx,%esi movdqa %xmm6,%xmm2 xorl %edx,%ecx addl %ebx,%eax rorl $7,%ebx movdqa %xmm6,%xmm0 xorl %edx,%esi pslldq $12,%xmm2 paddd %xmm6,%xmm6 movl %eax,%ebp addl 40(%esp),%edi psrld $31,%xmm0 xorl %ecx,%ebx roll $5,%eax movdqa %xmm2,%xmm1 addl %esi,%edi andl %ebx,%ebp xorl %ecx,%ebx psrld $30,%xmm2 addl %eax,%edi rorl $7,%eax por %xmm0,%xmm6 xorl %ecx,%ebp movdqa 64(%esp),%xmm0 movl %edi,%esi addl 44(%esp),%edx pslld $2,%xmm1 xorl %ebx,%eax roll $5,%edi pxor %xmm2,%xmm6 movdqa 112(%esp),%xmm2 addl %ebp,%edx andl %eax,%esi pxor %xmm1,%xmm6 pshufd $238,%xmm3,%xmm7 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi xorl %ebx,%esi movl %edx,%ebp punpcklqdq %xmm4,%xmm7 movdqa %xmm6,%xmm1 addl 48(%esp),%ecx xorl %eax,%edi paddd %xmm6,%xmm2 movdqa %xmm3,64(%esp) roll $5,%edx addl %esi,%ecx psrldq $4,%xmm1 andl %edi,%ebp xorl %eax,%edi pxor %xmm3,%xmm7 addl %edx,%ecx rorl $7,%edx pxor %xmm5,%xmm1 xorl %eax,%ebp movl %ecx,%esi addl 52(%esp),%ebx pxor %xmm1,%xmm7 xorl %edi,%edx roll $5,%ecx movdqa %xmm2,32(%esp) addl %ebp,%ebx andl %edx,%esi movdqa %xmm7,%xmm3 xorl %edi,%edx addl %ecx,%ebx rorl $7,%ecx movdqa %xmm7,%xmm1 xorl %edi,%esi pslldq $12,%xmm3 paddd %xmm7,%xmm7 movl %ebx,%ebp addl 56(%esp),%eax psrld $31,%xmm1 xorl %edx,%ecx roll $5,%ebx movdqa %xmm3,%xmm2 addl %esi,%eax andl %ecx,%ebp xorl %edx,%ecx psrld $30,%xmm3 addl %ebx,%eax rorl $7,%ebx por %xmm1,%xmm7 xorl %edx,%ebp movdqa 80(%esp),%xmm1 movl %eax,%esi addl 60(%esp),%edi pslld $2,%xmm2 xorl %ecx,%ebx roll $5,%eax pxor %xmm3,%xmm7 movdqa 112(%esp),%xmm3 addl %ebp,%edi andl %ebx,%esi pxor %xmm2,%xmm7 pshufd $238,%xmm6,%xmm2 xorl %ecx,%ebx addl %eax,%edi rorl $7,%eax pxor %xmm4,%xmm0 punpcklqdq %xmm7,%xmm2 xorl %ecx,%esi movl %edi,%ebp addl (%esp),%edx pxor %xmm1,%xmm0 movdqa %xmm4,80(%esp) xorl %ebx,%eax roll $5,%edi movdqa %xmm3,%xmm4 addl %esi,%edx paddd %xmm7,%xmm3 andl %eax,%ebp pxor %xmm2,%xmm0 xorl %ebx,%eax addl %edi,%edx rorl $7,%edi xorl %ebx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) movl %edx,%esi addl 4(%esp),%ecx xorl %eax,%edi roll $5,%edx pslld $2,%xmm0 addl %ebp,%ecx andl %edi,%esi psrld $30,%xmm2 xorl %eax,%edi addl %edx,%ecx rorl $7,%edx xorl %eax,%esi movl %ecx,%ebp addl 8(%esp),%ebx xorl %edi,%edx roll $5,%ecx por %xmm2,%xmm0 addl %esi,%ebx andl %edx,%ebp movdqa 96(%esp),%xmm2 xorl %edi,%edx addl %ecx,%ebx addl 12(%esp),%eax xorl %edi,%ebp movl %ebx,%esi pshufd $238,%xmm7,%xmm3 roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 16(%esp),%edi pxor %xmm5,%xmm1 punpcklqdq %xmm0,%xmm3 xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm2,%xmm1 movdqa %xmm5,96(%esp) addl %esi,%edi xorl %ecx,%ebp movdqa %xmm4,%xmm5 rorl $7,%ebx paddd %xmm0,%xmm4 addl %eax,%edi pxor %xmm3,%xmm1 addl 20(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx pslld $2,%xmm1 addl 24(%esp),%ecx xorl %eax,%esi psrld $30,%xmm3 movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx por %xmm3,%xmm1 addl 28(%esp),%ebx xorl %edi,%ebp movdqa 64(%esp),%xmm3 movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx pshufd $238,%xmm0,%xmm4 addl %ecx,%ebx addl 32(%esp),%eax pxor %xmm6,%xmm2 punpcklqdq %xmm1,%xmm4 xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx pxor %xmm3,%xmm2 movdqa %xmm6,64(%esp) addl %esi,%eax xorl %edx,%ebp movdqa 128(%esp),%xmm6 rorl $7,%ecx paddd %xmm1,%xmm5 addl %ebx,%eax pxor %xmm4,%xmm2 addl 36(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi pslld $2,%xmm2 addl 40(%esp),%edx xorl %ebx,%esi psrld $30,%xmm4 movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx por %xmm4,%xmm2 addl 44(%esp),%ecx xorl %eax,%ebp movdqa 80(%esp),%xmm4 movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi pshufd $238,%xmm1,%xmm5 addl %edx,%ecx addl 48(%esp),%ebx pxor %xmm7,%xmm3 punpcklqdq %xmm2,%xmm5 xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx pxor %xmm4,%xmm3 movdqa %xmm7,80(%esp) addl %esi,%ebx xorl %edi,%ebp movdqa %xmm6,%xmm7 rorl $7,%edx paddd %xmm2,%xmm6 addl %ecx,%ebx pxor %xmm5,%xmm3 addl 52(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax pslld $2,%xmm3 addl 56(%esp),%edi xorl %ecx,%esi psrld $30,%xmm5 movl %eax,%ebp roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi por %xmm5,%xmm3 addl 60(%esp),%edx xorl %ebx,%ebp movdqa 96(%esp),%xmm5 movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax pshufd $238,%xmm2,%xmm6 addl %edi,%edx addl (%esp),%ecx pxor %xmm0,%xmm4 punpcklqdq %xmm3,%xmm6 xorl %eax,%esi movl %edx,%ebp roll $5,%edx pxor %xmm5,%xmm4 movdqa %xmm0,96(%esp) addl %esi,%ecx xorl %eax,%ebp movdqa %xmm7,%xmm0 rorl $7,%edi paddd %xmm3,%xmm7 addl %edx,%ecx pxor %xmm6,%xmm4 addl 4(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm6 movdqa %xmm7,48(%esp) addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx pslld $2,%xmm4 addl 8(%esp),%eax xorl %edx,%esi psrld $30,%xmm6 movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax por %xmm6,%xmm4 addl 12(%esp),%edi xorl %ecx,%ebp movdqa 64(%esp),%xmm6 movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx pshufd $238,%xmm3,%xmm7 addl %eax,%edi addl 16(%esp),%edx pxor %xmm1,%xmm5 punpcklqdq %xmm4,%xmm7 xorl %ebx,%esi movl %edi,%ebp roll $5,%edi pxor %xmm6,%xmm5 movdqa %xmm1,64(%esp) addl %esi,%edx xorl %ebx,%ebp movdqa %xmm0,%xmm1 rorl $7,%eax paddd %xmm4,%xmm0 addl %edi,%edx pxor %xmm7,%xmm5 addl 20(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm7 movdqa %xmm0,(%esp) addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx pslld $2,%xmm5 addl 24(%esp),%ebx xorl %edi,%esi psrld $30,%xmm7 movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx por %xmm7,%xmm5 addl 28(%esp),%eax movdqa 80(%esp),%xmm7 rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx pshufd $238,%xmm4,%xmm0 addl %ebp,%eax xorl %ecx,%esi xorl %edx,%ecx addl %ebx,%eax addl 32(%esp),%edi pxor %xmm2,%xmm6 punpcklqdq %xmm5,%xmm0 andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx pxor %xmm7,%xmm6 movdqa %xmm2,80(%esp) movl %eax,%ebp xorl %ecx,%esi roll $5,%eax movdqa %xmm1,%xmm2 addl %esi,%edi paddd %xmm5,%xmm1 xorl %ebx,%ebp pxor %xmm0,%xmm6 xorl %ecx,%ebx addl %eax,%edi addl 36(%esp),%edx andl %ebx,%ebp movdqa %xmm6,%xmm0 movdqa %xmm1,16(%esp) xorl %ecx,%ebx rorl $7,%eax movl %edi,%esi xorl %ebx,%ebp roll $5,%edi pslld $2,%xmm6 addl %ebp,%edx xorl %eax,%esi psrld $30,%xmm0 xorl %ebx,%eax addl %edi,%edx addl 40(%esp),%ecx andl %eax,%esi xorl %ebx,%eax rorl $7,%edi por %xmm0,%xmm6 movl %edx,%ebp xorl %eax,%esi movdqa 96(%esp),%xmm0 roll $5,%edx addl %esi,%ecx xorl %edi,%ebp xorl %eax,%edi addl %edx,%ecx pshufd $238,%xmm5,%xmm1 addl 44(%esp),%ebx andl %edi,%ebp xorl %eax,%edi rorl $7,%edx movl %ecx,%esi xorl %edi,%ebp roll $5,%ecx addl %ebp,%ebx xorl %edx,%esi xorl %edi,%edx addl %ecx,%ebx addl 48(%esp),%eax pxor %xmm3,%xmm7 punpcklqdq %xmm6,%xmm1 andl %edx,%esi xorl %edi,%edx rorl $7,%ecx pxor %xmm0,%xmm7 movdqa %xmm3,96(%esp) movl %ebx,%ebp xorl %edx,%esi roll $5,%ebx movdqa 144(%esp),%xmm3 addl %esi,%eax paddd %xmm6,%xmm2 xorl %ecx,%ebp pxor %xmm1,%xmm7 xorl %edx,%ecx addl %ebx,%eax addl 52(%esp),%edi andl %ecx,%ebp movdqa %xmm7,%xmm1 movdqa %xmm2,32(%esp) xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%ebp roll $5,%eax pslld $2,%xmm7 addl %ebp,%edi xorl %ebx,%esi psrld $30,%xmm1 xorl %ecx,%ebx addl %eax,%edi addl 56(%esp),%edx andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax por %xmm1,%xmm7 movl %edi,%ebp xorl %ebx,%esi movdqa 64(%esp),%xmm1 roll $5,%edi addl %esi,%edx xorl %eax,%ebp xorl %ebx,%eax addl %edi,%edx pshufd $238,%xmm6,%xmm2 addl 60(%esp),%ecx andl %eax,%ebp xorl %ebx,%eax rorl $7,%edi movl %edx,%esi xorl %eax,%ebp roll $5,%edx addl %ebp,%ecx xorl %edi,%esi xorl %eax,%edi addl %edx,%ecx addl (%esp),%ebx pxor %xmm4,%xmm0 punpcklqdq %xmm7,%xmm2 andl %edi,%esi xorl %eax,%edi rorl $7,%edx pxor %xmm1,%xmm0 movdqa %xmm4,64(%esp) movl %ecx,%ebp xorl %edi,%esi roll $5,%ecx movdqa %xmm3,%xmm4 addl %esi,%ebx paddd %xmm7,%xmm3 xorl %edx,%ebp pxor %xmm2,%xmm0 xorl %edi,%edx addl %ecx,%ebx addl 4(%esp),%eax andl %edx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) xorl %edi,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx pslld $2,%xmm0 addl %ebp,%eax xorl %ecx,%esi psrld $30,%xmm2 xorl %edx,%ecx addl %ebx,%eax addl 8(%esp),%edi andl %ecx,%esi xorl %edx,%ecx rorl $7,%ebx por %xmm2,%xmm0 movl %eax,%ebp xorl %ecx,%esi movdqa 80(%esp),%xmm2 roll $5,%eax addl %esi,%edi xorl %ebx,%ebp xorl %ecx,%ebx addl %eax,%edi pshufd $238,%xmm7,%xmm3 addl 12(%esp),%edx andl %ebx,%ebp xorl %ecx,%ebx rorl $7,%eax movl %edi,%esi xorl %ebx,%ebp roll $5,%edi addl %ebp,%edx xorl %eax,%esi xorl %ebx,%eax addl %edi,%edx addl 16(%esp),%ecx pxor %xmm5,%xmm1 punpcklqdq %xmm0,%xmm3 andl %eax,%esi xorl %ebx,%eax rorl $7,%edi pxor %xmm2,%xmm1 movdqa %xmm5,80(%esp) movl %edx,%ebp xorl %eax,%esi roll $5,%edx movdqa %xmm4,%xmm5 addl %esi,%ecx paddd %xmm0,%xmm4 xorl %edi,%ebp pxor %xmm3,%xmm1 xorl %eax,%edi addl %edx,%ecx addl 20(%esp),%ebx andl %edi,%ebp movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) xorl %eax,%edi rorl $7,%edx movl %ecx,%esi xorl %edi,%ebp roll $5,%ecx pslld $2,%xmm1 addl %ebp,%ebx xorl %edx,%esi psrld $30,%xmm3 xorl %edi,%edx addl %ecx,%ebx addl 24(%esp),%eax andl %edx,%esi xorl %edi,%edx rorl $7,%ecx por %xmm3,%xmm1 movl %ebx,%ebp xorl %edx,%esi movdqa 96(%esp),%xmm3 roll $5,%ebx addl %esi,%eax xorl %ecx,%ebp xorl %edx,%ecx addl %ebx,%eax pshufd $238,%xmm0,%xmm4 addl 28(%esp),%edi andl %ecx,%ebp xorl %edx,%ecx rorl $7,%ebx movl %eax,%esi xorl %ecx,%ebp roll $5,%eax addl %ebp,%edi xorl %ebx,%esi xorl %ecx,%ebx addl %eax,%edi addl 32(%esp),%edx pxor %xmm6,%xmm2 punpcklqdq %xmm1,%xmm4 andl %ebx,%esi xorl %ecx,%ebx rorl $7,%eax pxor %xmm3,%xmm2 movdqa %xmm6,96(%esp) movl %edi,%ebp xorl %ebx,%esi roll $5,%edi movdqa %xmm5,%xmm6 addl %esi,%edx paddd %xmm1,%xmm5 xorl %eax,%ebp pxor %xmm4,%xmm2 xorl %ebx,%eax addl %edi,%edx addl 36(%esp),%ecx andl %eax,%ebp movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) xorl %ebx,%eax rorl $7,%edi movl %edx,%esi xorl %eax,%ebp roll $5,%edx pslld $2,%xmm2 addl %ebp,%ecx xorl %edi,%esi psrld $30,%xmm4 xorl %eax,%edi addl %edx,%ecx addl 40(%esp),%ebx andl %edi,%esi xorl %eax,%edi rorl $7,%edx por %xmm4,%xmm2 movl %ecx,%ebp xorl %edi,%esi movdqa 64(%esp),%xmm4 roll $5,%ecx addl %esi,%ebx xorl %edx,%ebp xorl %edi,%edx addl %ecx,%ebx pshufd $238,%xmm1,%xmm5 addl 44(%esp),%eax andl %edx,%ebp xorl %edi,%edx rorl $7,%ecx movl %ebx,%esi xorl %edx,%ebp roll $5,%ebx addl %ebp,%eax xorl %edx,%esi addl %ebx,%eax addl 48(%esp),%edi pxor %xmm7,%xmm3 punpcklqdq %xmm2,%xmm5 xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm4,%xmm3 movdqa %xmm7,64(%esp) addl %esi,%edi xorl %ecx,%ebp movdqa %xmm6,%xmm7 rorl $7,%ebx paddd %xmm2,%xmm6 addl %eax,%edi pxor %xmm5,%xmm3 addl 52(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx pslld $2,%xmm3 addl 56(%esp),%ecx xorl %eax,%esi psrld $30,%xmm5 movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx por %xmm5,%xmm3 addl 60(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx addl (%esp),%eax xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx paddd %xmm3,%xmm7 addl %ebx,%eax addl 4(%esp),%edi xorl %ecx,%ebp movl %eax,%esi movdqa %xmm7,48(%esp) roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 8(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx addl 12(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp je .L007done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 movdqu 16(%ebp),%xmm1 movdqu 32(%ebp),%xmm2 movdqu 48(%ebp),%xmm3 addl $64,%ebp .byte 102,15,56,0,198 movl %ebp,196(%esp) movdqa %xmm7,96(%esp) addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx .byte 102,15,56,0,206 addl %ecx,%ebx addl 20(%esp),%eax xorl %edx,%ebp movl %ebx,%esi paddd %xmm7,%xmm0 roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx movdqa %xmm0,(%esp) addl %ebx,%eax addl 24(%esp),%edi xorl %ecx,%esi movl %eax,%ebp psubd %xmm7,%xmm0 roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi addl 28(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx addl 32(%esp),%ecx xorl %eax,%esi movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi .byte 102,15,56,0,214 addl %edx,%ecx addl 36(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi paddd %xmm7,%xmm1 roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx movdqa %xmm1,16(%esp) addl %ecx,%ebx addl 40(%esp),%eax xorl %edx,%esi movl %ebx,%ebp psubd %xmm7,%xmm1 roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax addl 44(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 48(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax .byte 102,15,56,0,222 addl %edi,%edx addl 52(%esp),%ecx xorl %eax,%ebp movl %edx,%esi paddd %xmm7,%xmm2 roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi movdqa %xmm2,32(%esp) addl %edx,%ecx addl 56(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp psubd %xmm7,%xmm2 roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 60(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax rorl $7,%ecx addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax addl 4(%ebp),%esi addl 8(%ebp),%ecx movl %eax,(%ebp) addl 12(%ebp),%edx movl %esi,4(%ebp) addl 16(%ebp),%edi movl %ecx,8(%ebp) movl %ecx,%ebx movl %edx,12(%ebp) xorl %edx,%ebx movl %edi,16(%ebp) movl %esi,%ebp pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx jmp .L006loop .align 16 .L007done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 20(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax xorl %edx,%esi rorl $7,%ecx addl %ebx,%eax addl 24(%esp),%edi xorl %ecx,%esi movl %eax,%ebp roll $5,%eax addl %esi,%edi xorl %ecx,%ebp rorl $7,%ebx addl %eax,%edi addl 28(%esp),%edx xorl %ebx,%ebp movl %edi,%esi roll $5,%edi addl %ebp,%edx xorl %ebx,%esi rorl $7,%eax addl %edi,%edx addl 32(%esp),%ecx xorl %eax,%esi movl %edx,%ebp roll $5,%edx addl %esi,%ecx xorl %eax,%ebp rorl $7,%edi addl %edx,%ecx addl 36(%esp),%ebx xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx addl %ebp,%ebx xorl %edi,%esi rorl $7,%edx addl %ecx,%ebx addl 40(%esp),%eax xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx addl %esi,%eax xorl %edx,%ebp rorl $7,%ecx addl %ebx,%eax addl 44(%esp),%edi xorl %ecx,%ebp movl %eax,%esi roll $5,%eax addl %ebp,%edi xorl %ecx,%esi rorl $7,%ebx addl %eax,%edi addl 48(%esp),%edx xorl %ebx,%esi movl %edi,%ebp roll $5,%edi addl %esi,%edx xorl %ebx,%ebp rorl $7,%eax addl %edi,%edx addl 52(%esp),%ecx xorl %eax,%ebp movl %edx,%esi roll $5,%edx addl %ebp,%ecx xorl %eax,%esi rorl $7,%edi addl %edx,%ecx addl 56(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx addl %esi,%ebx xorl %edi,%ebp rorl $7,%edx addl %ecx,%ebx addl 60(%esp),%eax xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx addl %ebp,%eax rorl $7,%ecx addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax movl 204(%esp),%esp addl 4(%ebp),%esi addl 8(%ebp),%ecx movl %eax,(%ebp) addl 12(%ebp),%edx movl %esi,4(%ebp) addl 16(%ebp),%edi movl %ecx,8(%ebp) movl %edx,12(%ebp) movl %edi,16(%ebp) popl %edi popl %esi popl %ebx popl %ebp ret .size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.type _sha1_block_data_order_avx,@function +.align 16 +_sha1_block_data_order_avx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L008pic_point +.L008pic_point: + popl %ebp + leal .LK_XX_XX-.L008pic_point(%ebp),%ebp +.Lavx_shortcut: + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp .L009loop +.align 16 +.L009loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L010done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp .L009loop +.align 16 +.L010done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 .long 1859775393,1859775393,1859775393,1859775393 .long 2400959708,2400959708,2400959708,2400959708 .long 3395469782,3395469782,3395469782,3395469782 .long 66051,67438087,134810123,202182159 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/sha256-586.S =================================================================== --- head/secure/lib/libcrypto/i386/sha256-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/sha256-586.S (revision 299481) @@ -1,9158 +1,13571 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha256-586.pl. #ifdef PIC .file "sha256-586.S" .text .globl sha256_block_data_order .type sha256_block_data_order,@function .align 16 sha256_block_data_order: .L_sha256_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl %esp,%ebx call .L000pic_point .L000pic_point: popl %ebp leal .L001K256-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax addl %edi,%eax movl %esi,(%esp) movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx movl (%edx),%ecx movl 4(%edx),%ebx testl $1048576,%ecx jnz .L002loop movl 8(%edx),%edx testl $16777216,%ecx jz .L003no_xmm andl $1073741824,%ecx andl $268435968,%ebx testl $536870912,%edx jnz .L004shaext orl %ebx,%ecx andl $1342177280,%ecx cmpl $1342177280,%ecx + je .L005AVX testl $512,%ebx - jnz .L005SSSE3 + jnz .L006SSSE3 .L003no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L006unrolled + jae .L007unrolled jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx bswap %eax movl 12(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx bswap %eax movl 28(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx bswap %eax movl 44(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx bswap %eax movl 60(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx addl $64,%edi leal -36(%esp),%esp movl %edi,104(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi movl %ebx,8(%esp) xorl %ecx,%ebx movl %ecx,12(%esp) movl %edi,16(%esp) movl %ebx,(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi movl %ebx,24(%esp) movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00700_15: +.L00800_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx movl 28(%esp),%edi xorl %edx,%ecx xorl %edi,%esi movl 96(%esp),%ebx rorl $5,%ecx andl %edx,%esi movl %edx,20(%esp) xorl %ecx,%edx addl 32(%esp),%ebx xorl %edi,%esi rorl $6,%edx movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx addl %edx,%ebx movl 8(%esp),%edi xorl %eax,%ecx movl %eax,4(%esp) leal -4(%esp),%esp rorl $11,%ecx movl (%ebp),%esi xorl %eax,%ecx movl 20(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %esi,%ebx movl %eax,(%esp) addl %ebx,%edx andl 4(%esp),%eax addl %ecx,%ebx xorl %edi,%eax addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00700_15 + jne .L00800_15 movl 156(%esp),%ecx - jmp .L00816_63 + jmp .L00916_63 .align 16 -.L00816_63: +.L00916_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 160(%esp),%ebx shrl $10,%edi addl 124(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 24(%esp),%esi rorl $14,%ecx addl %edi,%ebx movl 28(%esp),%edi xorl %edx,%ecx xorl %edi,%esi movl %ebx,96(%esp) rorl $5,%ecx andl %edx,%esi movl %edx,20(%esp) xorl %ecx,%edx addl 32(%esp),%ebx xorl %edi,%esi rorl $6,%edx movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx addl %edx,%ebx movl 8(%esp),%edi xorl %eax,%ecx movl %eax,4(%esp) leal -4(%esp),%esp rorl $11,%ecx movl (%ebp),%esi xorl %eax,%ecx movl 20(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %esi,%ebx movl %eax,(%esp) addl %ebx,%edx andl 4(%esp),%eax addl %ecx,%ebx xorl %edi,%eax movl 156(%esp),%ecx addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00816_63 + jne .L00916_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl 24(%esp),%eax movl 28(%esp),%ebx movl 32(%esp),%ecx movl 360(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx addl 28(%esi),%ecx movl %edx,16(%esi) movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi jb .L002loop movl 12(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001K256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L006unrolled: +.L007unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp movl 8(%esi),%ecx movl 12(%esi),%ebx movl %ebp,4(%esp) xorl %ecx,%ebp movl %ecx,8(%esp) movl %ebx,12(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%esi movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L009grand_loop + jmp .L010grand_loop .align 16 -.L009grand_loop: +.L010grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx movl 8(%edi),%esi bswap %ecx movl %ebx,32(%esp) bswap %esi movl %ecx,36(%esp) movl %esi,40(%esp) movl 12(%edi),%ebx movl 16(%edi),%ecx bswap %ebx movl 20(%edi),%esi bswap %ecx movl %ebx,44(%esp) bswap %esi movl %ecx,48(%esp) movl %esi,52(%esp) movl 24(%edi),%ebx movl 28(%edi),%ecx bswap %ebx movl 32(%edi),%esi bswap %ecx movl %ebx,56(%esp) bswap %esi movl %ecx,60(%esp) movl %esi,64(%esp) movl 36(%edi),%ebx movl 40(%edi),%ecx bswap %ebx movl 44(%edi),%esi bswap %ecx movl %ebx,68(%esp) bswap %esi movl %ecx,72(%esp) movl %esi,76(%esp) movl 48(%edi),%ebx movl 52(%edi),%ecx bswap %ebx movl 56(%edi),%esi bswap %ecx movl %ebx,80(%esp) bswap %esi movl %ecx,84(%esp) movl %esi,88(%esp) movl 60(%edi),%ebx addl $64,%edi bswap %ebx movl %edi,100(%esp) movl %ebx,92(%esp) movl %edx,%ecx movl 20(%esp),%esi rorl $14,%edx movl 24(%esp),%edi xorl %ecx,%edx movl 32(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1116352408(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 16(%esp),%ecx rorl $14,%edx movl 20(%esp),%edi xorl %esi,%edx movl 36(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1899447441(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 12(%esp),%esi rorl $14,%edx movl 16(%esp),%edi xorl %ecx,%edx movl 40(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3049323471(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 8(%esp),%ecx rorl $14,%edx movl 12(%esp),%edi xorl %esi,%edx movl 44(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3921009573(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl %edx,%ecx movl 4(%esp),%esi rorl $14,%edx movl 8(%esp),%edi xorl %ecx,%edx movl 48(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 961987163(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl (%esp),%ecx rorl $14,%edx movl 4(%esp),%edi xorl %esi,%edx movl 52(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1508970993(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 28(%esp),%esi rorl $14,%edx movl (%esp),%edi xorl %ecx,%edx movl 56(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2453635748(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 24(%esp),%ecx rorl $14,%edx movl 28(%esp),%edi xorl %esi,%edx movl 60(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2870763221(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 20(%esp),%esi rorl $14,%edx movl 24(%esp),%edi xorl %ecx,%edx movl 64(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3624381080(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 16(%esp),%ecx rorl $14,%edx movl 20(%esp),%edi xorl %esi,%edx movl 68(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 310598401(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 12(%esp),%esi rorl $14,%edx movl 16(%esp),%edi xorl %ecx,%edx movl 72(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 607225278(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 8(%esp),%ecx rorl $14,%edx movl 12(%esp),%edi xorl %esi,%edx movl 76(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1426881987(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl %edx,%ecx movl 4(%esp),%esi rorl $14,%edx movl 8(%esp),%edi xorl %ecx,%edx movl 80(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1925078388(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl (%esp),%ecx rorl $14,%edx movl 4(%esp),%edi xorl %esi,%edx movl 84(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2162078206(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 28(%esp),%esi rorl $14,%edx movl (%esp),%edi xorl %ecx,%edx movl 88(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2614888103(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 24(%esp),%ecx rorl $14,%edx movl 28(%esp),%edi xorl %esi,%edx movl 92(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3248222580(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3835390401(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 4022224774(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 264347078(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 604807628(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 770255983(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1249150122(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1555081692(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1996064986(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2554220882(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2821834349(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2952996808(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3210313671(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3336571891(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3584528711(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,88(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 113926993(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,92(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 338241895(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 666307205(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 773529912(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1294757372(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1396182291(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1695183700(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1986661051(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2177026350(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2456956037(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2730485921(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2820302411(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3259730800(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3345764771(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3516065817(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3600352804(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,88(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 4094571909(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,92(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 275423344(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 430227734(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 506948616(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 659060556(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 883997877(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 958139571(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1322822218(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1537002063(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1747873779(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1955562222(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2024104815(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2227730452(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2361852424(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2428436474(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2756734187(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3204031479(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3329325298(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 96(%esp),%esi xorl %edi,%ebp movl 12(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebp addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebp,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl %ebp,4(%esp) xorl %edi,%ebp movl %edi,8(%esp) movl %ecx,12(%esp) movl 20(%esp),%edi movl 24(%esp),%ebx movl 28(%esp),%ecx addl 16(%esi),%edx addl 20(%esi),%edi addl 24(%esi),%ebx addl 28(%esi),%ecx movl %edx,16(%esi) movl %edi,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) movl %edi,20(%esp) movl 100(%esp),%edi movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L009grand_loop + jb .L010grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 32 .L004shaext: subl $32,%esp movdqu (%esi),%xmm1 leal 128(%ebp),%ebp movdqu 16(%esi),%xmm2 movdqa 128(%ebp),%xmm7 pshufd $27,%xmm1,%xmm0 pshufd $177,%xmm1,%xmm1 pshufd $27,%xmm2,%xmm2 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 - jmp .L010loop_shaext + jmp .L011loop_shaext .align 16 -.L010loop_shaext: +.L011loop_shaext: movdqu (%edi),%xmm3 movdqu 16(%edi),%xmm4 movdqu 32(%edi),%xmm5 .byte 102,15,56,0,223 movdqu 48(%edi),%xmm6 movdqa %xmm2,16(%esp) movdqa -128(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 102,15,56,0,231 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 nop movdqa %xmm1,(%esp) .byte 15,56,203,202 movdqa -112(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 102,15,56,0,239 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 leal 64(%edi),%edi .byte 15,56,204,220 .byte 15,56,203,202 movdqa -96(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 102,15,56,0,247 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa -80(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa -64(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa -48(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa -32(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa -16(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa (%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 16(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa 32(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa 48(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa 64(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 80(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 .byte 15,56,203,202 paddd %xmm7,%xmm6 movdqa 96(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 .byte 15,56,205,245 movdqa 128(%ebp),%xmm7 .byte 15,56,203,202 movdqa 112(%ebp),%xmm0 paddd %xmm6,%xmm0 nop .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 cmpl %edi,%eax nop .byte 15,56,203,202 paddd 16(%esp),%xmm2 paddd (%esp),%xmm1 - jnz .L010loop_shaext + jnz .L011loop_shaext pshufd $177,%xmm2,%xmm2 pshufd $27,%xmm1,%xmm7 pshufd $177,%xmm1,%xmm1 punpckhqdq %xmm2,%xmm1 .byte 102,15,58,15,215,8 movl 44(%esp),%esp movdqu %xmm1,(%esi) movdqu %xmm2,16(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .align 32 -.L005SSSE3: +.L006SSSE3: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi movl %ebx,4(%esp) xorl %ecx,%ebx movl %ecx,8(%esp) movl %edi,12(%esp) movl 16(%esi),%edx movl 20(%esi),%edi movl 24(%esi),%ecx movl 28(%esi),%esi movl %edi,20(%esp) movl 100(%esp),%edi movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L011grand_ssse3 + jmp .L012grand_ssse3 .align 16 -.L011grand_ssse3: +.L012grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 movdqu 48(%edi),%xmm3 addl $64,%edi .byte 102,15,56,0,199 movl %edi,100(%esp) .byte 102,15,56,0,207 movdqa (%ebp),%xmm4 .byte 102,15,56,0,215 movdqa 16(%ebp),%xmm5 paddd %xmm0,%xmm4 .byte 102,15,56,0,223 movdqa 32(%ebp),%xmm6 paddd %xmm1,%xmm5 movdqa 48(%ebp),%xmm7 movdqa %xmm4,32(%esp) paddd %xmm2,%xmm6 movdqa %xmm5,48(%esp) paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L012ssse3_00_47 + jmp .L013ssse3_00_47 .align 16 -.L012ssse3_00_47: +.L013ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 rorl $14,%edx movl 20(%esp),%esi movdqa %xmm3,%xmm7 xorl %ecx,%edx movl 24(%esp),%edi .byte 102,15,58,15,224,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,250,4 movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 4(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm0 movl %eax,(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm3,%xmm7 xorl %esi,%ecx addl 32(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 12(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl 16(%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,12(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm0 movl %ebx,28(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 36(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 8(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm0 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) pshufd $80,%xmm0,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 40(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 4(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa (%ebp),%xmm6 andl %ecx,%esi movl %ecx,4(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm0 movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx paddd %xmm0,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 44(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movdqa %xmm6,32(%esp) movl %edx,%ecx movdqa %xmm2,%xmm4 rorl $14,%edx movl 4(%esp),%esi movdqa %xmm0,%xmm7 xorl %ecx,%edx movl 8(%esp),%edi .byte 102,15,58,15,225,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,251,4 movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 20(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm1 movl %eax,16(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm0,%xmm7 xorl %esi,%ecx addl 48(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 28(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl (%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,28(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm1 movl %ebx,12(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 52(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 24(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm1 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) pshufd $80,%xmm1,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 56(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 20(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 16(%ebp),%xmm6 andl %ecx,%esi movl %ecx,20(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm1 movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx paddd %xmm1,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 60(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movdqa %xmm6,48(%esp) movl %edx,%ecx movdqa %xmm3,%xmm4 rorl $14,%edx movl 20(%esp),%esi movdqa %xmm1,%xmm7 xorl %ecx,%edx movl 24(%esp),%edi .byte 102,15,58,15,226,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,248,4 movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 4(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm2 movl %eax,(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm1,%xmm7 xorl %esi,%ecx addl 64(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 12(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl 16(%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,12(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm2 movl %ebx,28(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 68(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 8(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm2 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) pshufd $80,%xmm2,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 72(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 4(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 32(%ebp),%xmm6 andl %ecx,%esi movl %ecx,4(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm2 movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx paddd %xmm2,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 76(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movdqa %xmm6,64(%esp) movl %edx,%ecx movdqa %xmm0,%xmm4 rorl $14,%edx movl 4(%esp),%esi movdqa %xmm2,%xmm7 xorl %ecx,%edx movl 8(%esp),%edi .byte 102,15,58,15,227,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,249,4 movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 20(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm3 movl %eax,16(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm2,%xmm7 xorl %esi,%ecx addl 80(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 28(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl (%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,28(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm3 movl %ebx,12(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 84(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 24(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm3 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) pshufd $80,%xmm3,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 88(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 20(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 48(%ebp),%xmm6 andl %ecx,%esi movl %ecx,20(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm3 movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx paddd %xmm3,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 92(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L012ssse3_00_47 + jne .L013ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi xorl %ecx,%edx movl 24(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 4(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,(%esp) xorl %eax,%ecx xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 32(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 12(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 16(%esp),%esi xorl %ecx,%edx movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,12(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,28(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 36(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 8(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 40(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 4(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,4(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 44(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 4(%esp),%esi xorl %ecx,%edx movl 8(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 20(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,16(%esp) xorl %eax,%ecx xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 48(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 28(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl (%esp),%esi xorl %ecx,%edx movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,28(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,12(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 52(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 24(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 56(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 20(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,20(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 60(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi xorl %ecx,%edx movl 24(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 4(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,(%esp) xorl %eax,%ecx xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 64(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 12(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 16(%esp),%esi xorl %ecx,%edx movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,12(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,28(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 68(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 8(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 72(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 4(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,4(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 76(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 4(%esp),%esi xorl %ecx,%edx movl 8(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 20(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,16(%esp) xorl %eax,%ecx xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 80(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 28(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl (%esp),%esi xorl %ecx,%edx movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,28(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,12(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 84(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 24(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 88(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 20(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,20(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 92(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movl 96(%esp),%esi xorl %edi,%ebx movl 12(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl %ebx,4(%esp) xorl %edi,%ebx movl %edi,8(%esp) movl %ecx,12(%esp) movl 20(%esp),%edi movl 24(%esp),%ecx addl 16(%esi),%edx addl 20(%esi),%edi addl 24(%esi),%ecx movl %edx,16(%esi) movl %edi,20(%esi) movl %edi,20(%esp) movl 28(%esp),%edi movl %ecx,24(%esi) addl 28(%esi),%edi movl %ecx,24(%esp) movl %edi,28(%esi) movl %edi,28(%esp) movl 100(%esp),%edi movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L011grand_ssse3 + jb .L012grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret +.align 32 +.L005AVX: + andl $264,%edx + cmpl $264,%edx + je .L014AVX_BMI + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L015grand_avx +.align 32 +.L015grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L016avx_00_47 +.align 16 +.L016avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L016avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L015grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L014AVX_BMI: + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L017grand_avx_bmi +.align 32 +.L017grand_avx_bmi: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L018avx_bmi_00_47 +.align 16 +.L018avx_bmi_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + vpaddd %xmm4,%xmm0,%xmm0 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm0,%xmm0 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm0,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + vpaddd (%ebp),%xmm0,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + vpaddd %xmm4,%xmm1,%xmm1 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm1,%xmm1 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm1,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm0,%xmm1,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + vpaddd %xmm4,%xmm2,%xmm2 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm2,%xmm2 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm2,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + vpaddd %xmm4,%xmm3,%xmm3 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm3,%xmm3 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm3,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L018avx_bmi_00_47 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L017grand_avx_bmi + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret .size sha256_block_data_order,.-.L_sha256_block_data_order_begin .comm OPENSSL_ia32cap_P,16,4 #else .file "sha256-586.S" .text .globl sha256_block_data_order .type sha256_block_data_order,@function .align 16 sha256_block_data_order: .L_sha256_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl %esp,%ebx call .L000pic_point .L000pic_point: popl %ebp leal .L001K256-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax addl %edi,%eax movl %esi,(%esp) movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) leal OPENSSL_ia32cap_P,%edx movl (%edx),%ecx movl 4(%edx),%ebx testl $1048576,%ecx jnz .L002loop movl 8(%edx),%edx testl $16777216,%ecx jz .L003no_xmm andl $1073741824,%ecx andl $268435968,%ebx testl $536870912,%edx jnz .L004shaext orl %ebx,%ecx andl $1342177280,%ecx cmpl $1342177280,%ecx + je .L005AVX testl $512,%ebx - jnz .L005SSSE3 + jnz .L006SSSE3 .L003no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L006unrolled + jae .L007unrolled jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx bswap %eax movl 12(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx bswap %eax movl 28(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx bswap %eax movl 44(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx bswap %eax movl 60(%edi),%edx bswap %ebx pushl %eax bswap %ecx pushl %ebx bswap %edx pushl %ecx pushl %edx addl $64,%edi leal -36(%esp),%esp movl %edi,104(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi movl %ebx,8(%esp) xorl %ecx,%ebx movl %ecx,12(%esp) movl %edi,16(%esp) movl %ebx,(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi movl %ebx,24(%esp) movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00700_15: +.L00800_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx movl 28(%esp),%edi xorl %edx,%ecx xorl %edi,%esi movl 96(%esp),%ebx rorl $5,%ecx andl %edx,%esi movl %edx,20(%esp) xorl %ecx,%edx addl 32(%esp),%ebx xorl %edi,%esi rorl $6,%edx movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx addl %edx,%ebx movl 8(%esp),%edi xorl %eax,%ecx movl %eax,4(%esp) leal -4(%esp),%esp rorl $11,%ecx movl (%ebp),%esi xorl %eax,%ecx movl 20(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %esi,%ebx movl %eax,(%esp) addl %ebx,%edx andl 4(%esp),%eax addl %ecx,%ebx xorl %edi,%eax addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00700_15 + jne .L00800_15 movl 156(%esp),%ecx - jmp .L00816_63 + jmp .L00916_63 .align 16 -.L00816_63: +.L00916_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 160(%esp),%ebx shrl $10,%edi addl 124(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 24(%esp),%esi rorl $14,%ecx addl %edi,%ebx movl 28(%esp),%edi xorl %edx,%ecx xorl %edi,%esi movl %ebx,96(%esp) rorl $5,%ecx andl %edx,%esi movl %edx,20(%esp) xorl %ecx,%edx addl 32(%esp),%ebx xorl %edi,%esi rorl $6,%edx movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx addl %edx,%ebx movl 8(%esp),%edi xorl %eax,%ecx movl %eax,4(%esp) leal -4(%esp),%esp rorl $11,%ecx movl (%ebp),%esi xorl %eax,%ecx movl 20(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %esi,%ebx movl %eax,(%esp) addl %ebx,%edx andl 4(%esp),%eax addl %ecx,%ebx xorl %edi,%eax movl 156(%esp),%ecx addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00816_63 + jne .L00916_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl 24(%esp),%eax movl 28(%esp),%ebx movl 32(%esp),%ecx movl 360(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx addl 28(%esi),%ecx movl %edx,16(%esi) movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi jb .L002loop movl 12(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001K256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L006unrolled: +.L007unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp movl 8(%esi),%ecx movl 12(%esi),%ebx movl %ebp,4(%esp) xorl %ecx,%ebp movl %ecx,8(%esp) movl %ebx,12(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%esi movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L009grand_loop + jmp .L010grand_loop .align 16 -.L009grand_loop: +.L010grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx movl 8(%edi),%esi bswap %ecx movl %ebx,32(%esp) bswap %esi movl %ecx,36(%esp) movl %esi,40(%esp) movl 12(%edi),%ebx movl 16(%edi),%ecx bswap %ebx movl 20(%edi),%esi bswap %ecx movl %ebx,44(%esp) bswap %esi movl %ecx,48(%esp) movl %esi,52(%esp) movl 24(%edi),%ebx movl 28(%edi),%ecx bswap %ebx movl 32(%edi),%esi bswap %ecx movl %ebx,56(%esp) bswap %esi movl %ecx,60(%esp) movl %esi,64(%esp) movl 36(%edi),%ebx movl 40(%edi),%ecx bswap %ebx movl 44(%edi),%esi bswap %ecx movl %ebx,68(%esp) bswap %esi movl %ecx,72(%esp) movl %esi,76(%esp) movl 48(%edi),%ebx movl 52(%edi),%ecx bswap %ebx movl 56(%edi),%esi bswap %ecx movl %ebx,80(%esp) bswap %esi movl %ecx,84(%esp) movl %esi,88(%esp) movl 60(%edi),%ebx addl $64,%edi bswap %ebx movl %edi,100(%esp) movl %ebx,92(%esp) movl %edx,%ecx movl 20(%esp),%esi rorl $14,%edx movl 24(%esp),%edi xorl %ecx,%edx movl 32(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1116352408(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 16(%esp),%ecx rorl $14,%edx movl 20(%esp),%edi xorl %esi,%edx movl 36(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1899447441(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 12(%esp),%esi rorl $14,%edx movl 16(%esp),%edi xorl %ecx,%edx movl 40(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3049323471(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 8(%esp),%ecx rorl $14,%edx movl 12(%esp),%edi xorl %esi,%edx movl 44(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3921009573(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl %edx,%ecx movl 4(%esp),%esi rorl $14,%edx movl 8(%esp),%edi xorl %ecx,%edx movl 48(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 961987163(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl (%esp),%ecx rorl $14,%edx movl 4(%esp),%edi xorl %esi,%edx movl 52(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1508970993(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 28(%esp),%esi rorl $14,%edx movl (%esp),%edi xorl %ecx,%edx movl 56(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2453635748(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 24(%esp),%ecx rorl $14,%edx movl 28(%esp),%edi xorl %esi,%edx movl 60(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2870763221(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 20(%esp),%esi rorl $14,%edx movl 24(%esp),%edi xorl %ecx,%edx movl 64(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3624381080(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 16(%esp),%ecx rorl $14,%edx movl 20(%esp),%edi xorl %esi,%edx movl 68(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 310598401(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 12(%esp),%esi rorl $14,%edx movl 16(%esp),%edi xorl %ecx,%edx movl 72(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 607225278(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 8(%esp),%ecx rorl $14,%edx movl 12(%esp),%edi xorl %esi,%edx movl 76(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1426881987(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl %edx,%ecx movl 4(%esp),%esi rorl $14,%edx movl 8(%esp),%edi xorl %ecx,%edx movl 80(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1925078388(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl (%esp),%ecx rorl $14,%edx movl 4(%esp),%edi xorl %esi,%edx movl 84(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2162078206(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl %edx,%ecx movl 28(%esp),%esi rorl $14,%edx movl (%esp),%edi xorl %ecx,%edx movl 88(%esp),%ebx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2614888103(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl %edx,%esi movl 24(%esp),%ecx rorl $14,%edx movl 28(%esp),%edi xorl %esi,%edx movl 92(%esp),%ebx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3248222580(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3835390401(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 4022224774(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 264347078(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 604807628(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 770255983(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1249150122(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1555081692(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1996064986(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2554220882(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2821834349(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2952996808(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3210313671(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3336571891(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3584528711(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,88(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 113926993(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,92(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 338241895(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 666307205(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 773529912(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1294757372(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1396182291(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1695183700(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1986661051(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2177026350(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2456956037(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2730485921(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2820302411(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3259730800(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3345764771(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3516065817(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3600352804(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,88(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 4094571909(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,92(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 275423344(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 36(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 88(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 32(%esp),%ebx shrl $10,%edi addl 68(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,32(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 430227734(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 40(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 92(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 36(%esp),%ebx shrl $10,%edi addl 72(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,36(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 506948616(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 44(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 32(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 40(%esp),%ebx shrl $10,%edi addl 76(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,40(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 659060556(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 48(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 36(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 44(%esp),%ebx shrl $10,%edi addl 80(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,44(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 883997877(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 52(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 40(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 48(%esp),%ebx shrl $10,%edi addl 84(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,48(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 958139571(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 56(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 44(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 52(%esp),%ebx shrl $10,%edi addl 88(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,52(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1322822218(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 60(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 48(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 56(%esp),%ebx shrl $10,%edi addl 92(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx movl %ebx,56(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1537002063(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 64(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 52(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 60(%esp),%ebx shrl $10,%edi addl 32(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx movl %ebx,60(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 1747873779(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 68(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 56(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 64(%esp),%ebx shrl $10,%edi addl 36(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 20(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 24(%esp),%edi xorl %ecx,%edx movl %ebx,64(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx addl 28(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 4(%esp),%edi xorl %eax,%ecx movl %eax,(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 1955562222(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 72(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 12(%esp),%edx addl %ecx,%ebp movl 60(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 68(%esp),%ebx shrl $10,%edi addl 40(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 16(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 20(%esp),%edi xorl %esi,%edx movl %ebx,68(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,12(%esp) xorl %esi,%edx addl 24(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl (%esp),%edi xorl %ebp,%esi movl %ebp,28(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2024104815(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 76(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 8(%esp),%edx addl %esi,%eax movl 64(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 72(%esp),%ebx shrl $10,%edi addl 44(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 12(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 16(%esp),%edi xorl %ecx,%edx movl %ebx,72(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx addl 20(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 28(%esp),%edi xorl %eax,%ecx movl %eax,24(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2227730452(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 80(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 4(%esp),%edx addl %ecx,%ebp movl 68(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 76(%esp),%ebx shrl $10,%edi addl 48(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 8(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 12(%esp),%edi xorl %esi,%edx movl %ebx,76(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,4(%esp) xorl %esi,%edx addl 16(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 24(%esp),%edi xorl %ebp,%esi movl %ebp,20(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2361852424(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 84(%esp),%ecx rorl $2,%esi addl %edx,%eax addl (%esp),%edx addl %esi,%eax movl 72(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 80(%esp),%ebx shrl $10,%edi addl 52(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 4(%esp),%esi rorl $14,%edx addl %edi,%ebx movl 8(%esp),%edi xorl %ecx,%edx movl %ebx,80(%esp) xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx addl 12(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 20(%esp),%edi xorl %eax,%ecx movl %eax,16(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 2428436474(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 88(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 28(%esp),%edx addl %ecx,%ebp movl 76(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 84(%esp),%ebx shrl $10,%edi addl 56(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl (%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 4(%esp),%edi xorl %esi,%edx movl %ebx,84(%esp) xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,28(%esp) xorl %esi,%edx addl 8(%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 16(%esp),%edi xorl %ebp,%esi movl %ebp,12(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 2756734187(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax movl 92(%esp),%ecx rorl $2,%esi addl %edx,%eax addl 24(%esp),%edx addl %esi,%eax movl 80(%esp),%esi movl %ecx,%ebx rorl $11,%ecx movl %esi,%edi rorl $2,%esi xorl %ebx,%ecx shrl $3,%ebx rorl $7,%ecx xorl %edi,%esi xorl %ecx,%ebx rorl $17,%esi addl 88(%esp),%ebx shrl $10,%edi addl 60(%esp),%ebx movl %edx,%ecx xorl %esi,%edi movl 28(%esp),%esi rorl $14,%edx addl %edi,%ebx movl (%esp),%edi xorl %ecx,%edx xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx addl 4(%esp),%ebx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%ebx rorl $9,%ecx movl %eax,%esi movl 12(%esp),%edi xorl %eax,%ecx movl %eax,8(%esp) xorl %edi,%eax rorl $11,%ecx andl %eax,%ebp leal 3204031479(%ebx,%edx,1),%edx xorl %esi,%ecx xorl %edi,%ebp movl 32(%esp),%esi rorl $2,%ecx addl %edx,%ebp addl 20(%esp),%edx addl %ecx,%ebp movl 84(%esp),%ecx movl %esi,%ebx rorl $11,%esi movl %ecx,%edi rorl $2,%ecx xorl %ebx,%esi shrl $3,%ebx rorl $7,%esi xorl %edi,%ecx xorl %esi,%ebx rorl $17,%ecx addl 92(%esp),%ebx shrl $10,%edi addl 64(%esp),%ebx movl %edx,%esi xorl %ecx,%edi movl 24(%esp),%ecx rorl $14,%edx addl %edi,%ebx movl 28(%esp),%edi xorl %esi,%edx xorl %edi,%ecx rorl $5,%edx andl %esi,%ecx movl %esi,20(%esp) xorl %esi,%edx addl (%esp),%ebx xorl %ecx,%edi rorl $6,%edx movl %ebp,%esi addl %edi,%ebx rorl $9,%esi movl %ebp,%ecx movl 8(%esp),%edi xorl %ebp,%esi movl %ebp,4(%esp) xorl %edi,%ebp rorl $11,%esi andl %ebp,%eax leal 3329325298(%ebx,%edx,1),%edx xorl %ecx,%esi xorl %edi,%eax rorl $2,%esi addl %edx,%eax addl 16(%esp),%edx addl %esi,%eax movl 96(%esp),%esi xorl %edi,%ebp movl 12(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebp addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebp,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl %ebp,4(%esp) xorl %edi,%ebp movl %edi,8(%esp) movl %ecx,12(%esp) movl 20(%esp),%edi movl 24(%esp),%ebx movl 28(%esp),%ecx addl 16(%esi),%edx addl 20(%esi),%edi addl 24(%esi),%ebx addl 28(%esi),%ecx movl %edx,16(%esi) movl %edi,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) movl %edi,20(%esp) movl 100(%esp),%edi movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L009grand_loop + jb .L010grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 32 .L004shaext: subl $32,%esp movdqu (%esi),%xmm1 leal 128(%ebp),%ebp movdqu 16(%esi),%xmm2 movdqa 128(%ebp),%xmm7 pshufd $27,%xmm1,%xmm0 pshufd $177,%xmm1,%xmm1 pshufd $27,%xmm2,%xmm2 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 - jmp .L010loop_shaext + jmp .L011loop_shaext .align 16 -.L010loop_shaext: +.L011loop_shaext: movdqu (%edi),%xmm3 movdqu 16(%edi),%xmm4 movdqu 32(%edi),%xmm5 .byte 102,15,56,0,223 movdqu 48(%edi),%xmm6 movdqa %xmm2,16(%esp) movdqa -128(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 102,15,56,0,231 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 nop movdqa %xmm1,(%esp) .byte 15,56,203,202 movdqa -112(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 102,15,56,0,239 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 leal 64(%edi),%edi .byte 15,56,204,220 .byte 15,56,203,202 movdqa -96(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 102,15,56,0,247 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa -80(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa -64(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa -48(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa -32(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa -16(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa (%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 16(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 nop paddd %xmm7,%xmm6 .byte 15,56,204,220 .byte 15,56,203,202 movdqa 32(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,205,245 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm6,%xmm7 .byte 102,15,58,15,253,4 nop paddd %xmm7,%xmm3 .byte 15,56,204,229 .byte 15,56,203,202 movdqa 48(%ebp),%xmm0 paddd %xmm6,%xmm0 .byte 15,56,205,222 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm3,%xmm7 .byte 102,15,58,15,254,4 nop paddd %xmm7,%xmm4 .byte 15,56,204,238 .byte 15,56,203,202 movdqa 64(%ebp),%xmm0 paddd %xmm3,%xmm0 .byte 15,56,205,227 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm4,%xmm7 .byte 102,15,58,15,251,4 nop paddd %xmm7,%xmm5 .byte 15,56,204,243 .byte 15,56,203,202 movdqa 80(%ebp),%xmm0 paddd %xmm4,%xmm0 .byte 15,56,205,236 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 movdqa %xmm5,%xmm7 .byte 102,15,58,15,252,4 .byte 15,56,203,202 paddd %xmm7,%xmm6 movdqa 96(%ebp),%xmm0 paddd %xmm5,%xmm0 .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 .byte 15,56,205,245 movdqa 128(%ebp),%xmm7 .byte 15,56,203,202 movdqa 112(%ebp),%xmm0 paddd %xmm6,%xmm0 nop .byte 15,56,203,209 pshufd $14,%xmm0,%xmm0 cmpl %edi,%eax nop .byte 15,56,203,202 paddd 16(%esp),%xmm2 paddd (%esp),%xmm1 - jnz .L010loop_shaext + jnz .L011loop_shaext pshufd $177,%xmm2,%xmm2 pshufd $27,%xmm1,%xmm7 pshufd $177,%xmm1,%xmm1 punpckhqdq %xmm2,%xmm1 .byte 102,15,58,15,215,8 movl 44(%esp),%esp movdqu %xmm1,(%esi) movdqu %xmm2,16(%esi) popl %edi popl %esi popl %ebx popl %ebp ret .align 32 -.L005SSSE3: +.L006SSSE3: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi movl %ebx,4(%esp) xorl %ecx,%ebx movl %ecx,8(%esp) movl %edi,12(%esp) movl 16(%esi),%edx movl 20(%esi),%edi movl 24(%esi),%ecx movl 28(%esi),%esi movl %edi,20(%esp) movl 100(%esp),%edi movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L011grand_ssse3 + jmp .L012grand_ssse3 .align 16 -.L011grand_ssse3: +.L012grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 movdqu 48(%edi),%xmm3 addl $64,%edi .byte 102,15,56,0,199 movl %edi,100(%esp) .byte 102,15,56,0,207 movdqa (%ebp),%xmm4 .byte 102,15,56,0,215 movdqa 16(%ebp),%xmm5 paddd %xmm0,%xmm4 .byte 102,15,56,0,223 movdqa 32(%ebp),%xmm6 paddd %xmm1,%xmm5 movdqa 48(%ebp),%xmm7 movdqa %xmm4,32(%esp) paddd %xmm2,%xmm6 movdqa %xmm5,48(%esp) paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L012ssse3_00_47 + jmp .L013ssse3_00_47 .align 16 -.L012ssse3_00_47: +.L013ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 rorl $14,%edx movl 20(%esp),%esi movdqa %xmm3,%xmm7 xorl %ecx,%edx movl 24(%esp),%edi .byte 102,15,58,15,224,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,250,4 movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 4(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm0 movl %eax,(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm3,%xmm7 xorl %esi,%ecx addl 32(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 12(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl 16(%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,12(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm0 movl %ebx,28(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 36(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 8(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm0 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) pshufd $80,%xmm0,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 40(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 4(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa (%ebp),%xmm6 andl %ecx,%esi movl %ecx,4(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm0 movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx paddd %xmm0,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 44(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movdqa %xmm6,32(%esp) movl %edx,%ecx movdqa %xmm2,%xmm4 rorl $14,%edx movl 4(%esp),%esi movdqa %xmm0,%xmm7 xorl %ecx,%edx movl 8(%esp),%edi .byte 102,15,58,15,225,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,251,4 movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 20(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm1 movl %eax,16(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm0,%xmm7 xorl %esi,%ecx addl 48(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 28(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl (%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,28(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm1 movl %ebx,12(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 52(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 24(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm1 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) pshufd $80,%xmm1,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 56(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 20(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 16(%ebp),%xmm6 andl %ecx,%esi movl %ecx,20(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm1 movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx paddd %xmm1,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 60(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movdqa %xmm6,48(%esp) movl %edx,%ecx movdqa %xmm3,%xmm4 rorl $14,%edx movl 20(%esp),%esi movdqa %xmm1,%xmm7 xorl %ecx,%edx movl 24(%esp),%edi .byte 102,15,58,15,226,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,248,4 movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 4(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm2 movl %eax,(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm1,%xmm7 xorl %esi,%ecx addl 64(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 12(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl 16(%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,12(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm2 movl %ebx,28(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 68(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 8(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm2 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) pshufd $80,%xmm2,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 72(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 4(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 32(%ebp),%xmm6 andl %ecx,%esi movl %ecx,4(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm2 movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx paddd %xmm2,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 76(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movdqa %xmm6,64(%esp) movl %edx,%ecx movdqa %xmm0,%xmm4 rorl $14,%edx movl 4(%esp),%esi movdqa %xmm2,%xmm7 xorl %ecx,%edx movl 8(%esp),%edi .byte 102,15,58,15,227,4 xorl %edi,%esi rorl $5,%edx andl %ecx,%esi .byte 102,15,58,15,249,4 movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi movdqa %xmm4,%xmm5 rorl $6,%edx movl %eax,%ecx movdqa %xmm4,%xmm6 addl %edi,%edx movl 20(%esp),%edi psrld $3,%xmm4 movl %eax,%esi rorl $9,%ecx paddd %xmm7,%xmm3 movl %eax,16(%esp) xorl %eax,%ecx psrld $7,%xmm6 xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx pshufd $250,%xmm2,%xmm7 xorl %esi,%ecx addl 80(%esp),%edx pslld $14,%xmm5 xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm4 addl %edx,%ebx addl 28(%esp),%edx psrld $11,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm5,%xmm4 movl (%esp),%esi xorl %ecx,%edx pslld $11,%xmm5 movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx pxor %xmm6,%xmm4 andl %ecx,%esi movl %ecx,28(%esp) movdqa %xmm7,%xmm6 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx pxor %xmm5,%xmm4 movl %ebx,%ecx addl %edi,%edx psrld $10,%xmm7 movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm4,%xmm3 movl %ebx,12(%esp) xorl %ebx,%ecx psrlq $17,%xmm6 xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx pxor %xmm6,%xmm7 andl %ebx,%eax xorl %esi,%ecx psrlq $2,%xmm6 addl 84(%esp),%edx xorl %edi,%eax rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%eax addl 24(%esp),%edx pshufd $128,%xmm7,%xmm7 addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi psrldq $8,%xmm7 movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi paddd %xmm7,%xmm3 rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) pshufd $80,%xmm3,%xmm7 xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx movdqa %xmm7,%xmm6 rorl $11,%ecx psrld $10,%xmm7 andl %eax,%ebx psrlq $17,%xmm6 xorl %esi,%ecx addl 88(%esp),%edx xorl %edi,%ebx rorl $2,%ecx pxor %xmm6,%xmm7 addl %edx,%ebx addl 20(%esp),%edx psrlq $2,%xmm6 addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx pxor %xmm6,%xmm7 movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi pshufd $8,%xmm7,%xmm7 xorl %edi,%esi rorl $5,%edx movdqa 48(%ebp),%xmm6 andl %ecx,%esi movl %ecx,20(%esp) pslldq $8,%xmm7 xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx paddd %xmm7,%xmm3 movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx paddd %xmm3,%xmm6 rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 92(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L012ssse3_00_47 + jne .L013ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi xorl %ecx,%edx movl 24(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 4(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,(%esp) xorl %eax,%ecx xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 32(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 12(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 16(%esp),%esi xorl %ecx,%edx movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,12(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,28(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 36(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 8(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 40(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 4(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,4(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 44(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 4(%esp),%esi xorl %ecx,%edx movl 8(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 20(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,16(%esp) xorl %eax,%ecx xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 48(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 28(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl (%esp),%esi xorl %ecx,%edx movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,28(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,12(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 52(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 24(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 56(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 20(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,20(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 60(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi xorl %ecx,%edx movl 24(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,16(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 4(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,(%esp) xorl %eax,%ecx xorl %edi,%eax addl 28(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 64(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 12(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 16(%esp),%esi xorl %ecx,%edx movl 20(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,12(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl (%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,28(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 24(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 68(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 8(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 12(%esp),%esi xorl %ecx,%edx movl 16(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,8(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 28(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,24(%esp) xorl %eax,%ecx xorl %edi,%eax addl 20(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 72(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 4(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 8(%esp),%esi xorl %ecx,%edx movl 12(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,4(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 24(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,20(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 16(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 76(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl (%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 4(%esp),%esi xorl %ecx,%edx movl 8(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 20(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,16(%esp) xorl %eax,%ecx xorl %edi,%eax addl 12(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 80(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 28(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl (%esp),%esi xorl %ecx,%edx movl 4(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,28(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 16(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,12(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl 8(%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 84(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 24(%esp),%edx addl %ecx,%eax movl %edx,%ecx rorl $14,%edx movl 28(%esp),%esi xorl %ecx,%edx movl (%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,24(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %eax,%ecx addl %edi,%edx movl 12(%esp),%edi movl %eax,%esi rorl $9,%ecx movl %eax,8(%esp) xorl %eax,%ecx xorl %edi,%eax addl 4(%esp),%edx rorl $11,%ecx andl %eax,%ebx xorl %esi,%ecx addl 88(%esp),%edx xorl %edi,%ebx rorl $2,%ecx addl %edx,%ebx addl 20(%esp),%edx addl %ecx,%ebx movl %edx,%ecx rorl $14,%edx movl 24(%esp),%esi xorl %ecx,%edx movl 28(%esp),%edi xorl %edi,%esi rorl $5,%edx andl %ecx,%esi movl %ecx,20(%esp) xorl %ecx,%edx xorl %esi,%edi rorl $6,%edx movl %ebx,%ecx addl %edi,%edx movl 8(%esp),%edi movl %ebx,%esi rorl $9,%ecx movl %ebx,4(%esp) xorl %ebx,%ecx xorl %edi,%ebx addl (%esp),%edx rorl $11,%ecx andl %ebx,%eax xorl %esi,%ecx addl 92(%esp),%edx xorl %edi,%eax rorl $2,%ecx addl %edx,%eax addl 16(%esp),%edx addl %ecx,%eax movl 96(%esp),%esi xorl %edi,%ebx movl 12(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx addl 8(%esi),%edi addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) movl %edi,8(%esi) movl %ecx,12(%esi) movl %ebx,4(%esp) xorl %edi,%ebx movl %edi,8(%esp) movl %ecx,12(%esp) movl 20(%esp),%edi movl 24(%esp),%ecx addl 16(%esi),%edx addl 20(%esi),%edi addl 24(%esi),%ecx movl %edx,16(%esi) movl %edi,20(%esi) movl %edi,20(%esp) movl 28(%esp),%edi movl %ecx,24(%esi) addl 28(%esi),%edi movl %ecx,24(%esp) movl %edi,28(%esi) movl %edi,28(%esp) movl 100(%esp),%edi movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L011grand_ssse3 + jb .L012grand_ssse3 movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L005AVX: + andl $264,%edx + cmpl $264,%edx + je .L014AVX_BMI + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L015grand_avx +.align 32 +.L015grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L016avx_00_47 +.align 16 +.L016avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L016avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L015grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L014AVX_BMI: + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L017grand_avx_bmi +.align 32 +.L017grand_avx_bmi: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L018avx_bmi_00_47 +.align 16 +.L018avx_bmi_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + vpaddd %xmm4,%xmm0,%xmm0 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm0,%xmm0 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm0,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + vpaddd (%ebp),%xmm0,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + vpaddd %xmm4,%xmm1,%xmm1 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm1,%xmm1 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm1,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm0,%xmm1,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + vpaddd %xmm4,%xmm2,%xmm2 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm2,%xmm2 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm2,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + vpaddd %xmm4,%xmm3,%xmm3 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm3,%xmm3 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm3,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L018avx_bmi_00_47 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L017grand_avx_bmi + movl 108(%esp),%esp + vzeroall popl %edi popl %esi popl %ebx popl %ebp ret .size sha256_block_data_order,.-.L_sha256_block_data_order_begin .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/sha512-586.S =================================================================== --- head/secure/lib/libcrypto/i386/sha512-586.S (revision 299480) +++ head/secure/lib/libcrypto/i386/sha512-586.S (revision 299481) @@ -1,5662 +1,5663 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha512-586.pl. #ifdef PIC .file "sha512-586.S" .text .globl sha512_block_data_order .type sha512_block_data_order,@function .align 16 sha512_block_data_order: .L_sha512_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl %esp,%ebx call .L000pic_point .L000pic_point: popl %ebp leal .L001K512-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $7,%eax addl %edi,%eax movl %esi,(%esp) movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) leal OPENSSL_ia32cap_P-.L001K512(%ebp),%edx movl (%edx),%ecx testl $67108864,%ecx jz .L002loop_x86 movl 4(%edx),%edx movq (%esi),%mm0 andl $16777216,%ecx movq 8(%esi),%mm1 andl $512,%edx movq 16(%esi),%mm2 orl %edx,%ecx movq 24(%esi),%mm3 movq 32(%esi),%mm4 movq 40(%esi),%mm5 movq 48(%esi),%mm6 movq 56(%esi),%mm7 cmpl $16777728,%ecx je .L003SSSE3 subl $80,%esp jmp .L004loop_sse2 .align 16 .L004loop_sse2: movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) pxor %mm1,%mm2 movq %mm7,56(%esp) movq %mm0,%mm3 movl (%edi),%eax movl 4(%edi),%ebx addl $8,%edi movl $15,%edx bswap %eax bswap %ebx jmp .L00500_14_sse2 .align 16 .L00500_14_sse2: movd %eax,%mm1 movl (%edi),%eax movd %ebx,%mm7 movl 4(%edi),%ebx addl $8,%edi bswap %eax bswap %ebx punpckldq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm3,%mm0 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm2,%mm3 movq %mm0,%mm2 addl $8,%ebp paddq %mm6,%mm3 movq 48(%esp),%mm6 decl %edx jnz .L00500_14_sse2 movd %eax,%mm1 movd %ebx,%mm7 punpckldq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm3,%mm0 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm2,%mm3 movq %mm0,%mm2 addl $8,%ebp paddq %mm6,%mm3 pxor %mm0,%mm0 movl $32,%edx jmp .L00616_79_sse2 .align 16 .L00616_79_sse2: movq 88(%esp),%mm5 movq %mm7,%mm1 psrlq $1,%mm7 movq %mm5,%mm6 psrlq $6,%mm5 psllq $56,%mm1 paddq %mm3,%mm0 movq %mm7,%mm3 psrlq $6,%mm7 pxor %mm1,%mm3 psllq $7,%mm1 pxor %mm7,%mm3 psrlq $1,%mm7 pxor %mm1,%mm3 movq %mm5,%mm1 psrlq $13,%mm5 pxor %mm3,%mm7 psllq $3,%mm6 pxor %mm5,%mm1 paddq 200(%esp),%mm7 pxor %mm6,%mm1 psrlq $42,%mm5 paddq 128(%esp),%mm7 pxor %mm5,%mm1 psllq $42,%mm6 movq 40(%esp),%mm5 pxor %mm6,%mm1 movq 48(%esp),%mm6 paddq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm6,%mm2 addl $8,%ebp movq 88(%esp),%mm5 movq %mm7,%mm1 psrlq $1,%mm7 movq %mm5,%mm6 psrlq $6,%mm5 psllq $56,%mm1 paddq %mm3,%mm2 movq %mm7,%mm3 psrlq $6,%mm7 pxor %mm1,%mm3 psllq $7,%mm1 pxor %mm7,%mm3 psrlq $1,%mm7 pxor %mm1,%mm3 movq %mm5,%mm1 psrlq $13,%mm5 pxor %mm3,%mm7 psllq $3,%mm6 pxor %mm5,%mm1 paddq 200(%esp),%mm7 pxor %mm6,%mm1 psrlq $42,%mm5 paddq 128(%esp),%mm7 pxor %mm5,%mm1 psllq $42,%mm6 movq 40(%esp),%mm5 pxor %mm6,%mm1 movq 48(%esp),%mm6 paddq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm6,%mm0 addl $8,%ebp decl %edx jnz .L00616_79_sse2 paddq %mm3,%mm0 movq 8(%esp),%mm1 movq 24(%esp),%mm3 movq 40(%esp),%mm5 movq 48(%esp),%mm6 movq 56(%esp),%mm7 pxor %mm1,%mm2 paddq (%esi),%mm0 paddq 8(%esi),%mm1 paddq 16(%esi),%mm2 paddq 24(%esi),%mm3 paddq 32(%esi),%mm4 paddq 40(%esi),%mm5 paddq 48(%esi),%mm6 paddq 56(%esi),%mm7 movl $640,%eax movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) leal (%esp,%eax,1),%esp subl %eax,%ebp cmpl 88(%esp),%edi jb .L004loop_sse2 movl 92(%esp),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .align 32 .L003SSSE3: leal -64(%esp),%edx subl $256,%esp movdqa 640(%ebp),%xmm1 movdqu (%edi),%xmm0 .byte 102,15,56,0,193 movdqa (%ebp),%xmm3 movdqa %xmm1,%xmm2 movdqu 16(%edi),%xmm1 paddq %xmm0,%xmm3 .byte 102,15,56,0,202 movdqa %xmm3,-128(%edx) movdqa 16(%ebp),%xmm4 movdqa %xmm2,%xmm3 movdqu 32(%edi),%xmm2 paddq %xmm1,%xmm4 .byte 102,15,56,0,211 movdqa %xmm4,-112(%edx) movdqa 32(%ebp),%xmm5 movdqa %xmm3,%xmm4 movdqu 48(%edi),%xmm3 paddq %xmm2,%xmm5 .byte 102,15,56,0,220 movdqa %xmm5,-96(%edx) movdqa 48(%ebp),%xmm6 movdqa %xmm4,%xmm5 movdqu 64(%edi),%xmm4 paddq %xmm3,%xmm6 .byte 102,15,56,0,229 movdqa %xmm6,-80(%edx) movdqa 64(%ebp),%xmm7 movdqa %xmm5,%xmm6 movdqu 80(%edi),%xmm5 paddq %xmm4,%xmm7 .byte 102,15,56,0,238 movdqa %xmm7,-64(%edx) movdqa %xmm0,(%edx) movdqa 80(%ebp),%xmm0 movdqa %xmm6,%xmm7 movdqu 96(%edi),%xmm6 paddq %xmm5,%xmm0 .byte 102,15,56,0,247 movdqa %xmm0,-48(%edx) movdqa %xmm1,16(%edx) movdqa 96(%ebp),%xmm1 movdqa %xmm7,%xmm0 movdqu 112(%edi),%xmm7 paddq %xmm6,%xmm1 .byte 102,15,56,0,248 movdqa %xmm1,-32(%edx) movdqa %xmm2,32(%edx) movdqa 112(%ebp),%xmm2 movdqa (%edx),%xmm0 paddq %xmm7,%xmm2 movdqa %xmm2,-16(%edx) nop .align 32 .L007loop_ssse3: movdqa 16(%edx),%xmm2 movdqa %xmm3,48(%edx) leal 128(%ebp),%ebp movq %mm1,8(%esp) movl %edi,%ebx movq %mm2,16(%esp) leal 128(%edi),%edi movq %mm3,24(%esp) cmpl %eax,%edi movq %mm5,40(%esp) cmovbl %edi,%ebx movq %mm6,48(%esp) movl $4,%ecx pxor %mm1,%mm2 movq %mm7,56(%esp) pxor %mm3,%mm3 jmp .L00800_47_ssse3 .align 32 .L00800_47_ssse3: movdqa %xmm5,%xmm3 movdqa %xmm2,%xmm1 .byte 102,15,58,15,208,8 movdqa %xmm4,(%edx) .byte 102,15,58,15,220,8 movdqa %xmm2,%xmm4 psrlq $7,%xmm2 paddq %xmm3,%xmm0 movdqa %xmm4,%xmm3 psrlq $1,%xmm4 psllq $56,%xmm3 pxor %xmm4,%xmm2 psrlq $7,%xmm4 pxor %xmm3,%xmm2 psllq $7,%xmm3 pxor %xmm4,%xmm2 movdqa %xmm7,%xmm4 pxor %xmm3,%xmm2 movdqa %xmm7,%xmm3 psrlq $6,%xmm4 paddq %xmm2,%xmm0 movdqa %xmm7,%xmm2 psrlq $19,%xmm3 psllq $3,%xmm2 pxor %xmm3,%xmm4 psrlq $42,%xmm3 pxor %xmm2,%xmm4 psllq $42,%xmm2 pxor %xmm3,%xmm4 movdqa 32(%edx),%xmm3 pxor %xmm2,%xmm4 movdqa (%ebp),%xmm2 movq %mm4,%mm1 paddq %xmm4,%xmm0 movq -128(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) paddq %xmm0,%xmm2 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -120(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm2,-128(%edx) movdqa %xmm6,%xmm4 movdqa %xmm3,%xmm2 .byte 102,15,58,15,217,8 movdqa %xmm5,16(%edx) .byte 102,15,58,15,229,8 movdqa %xmm3,%xmm5 psrlq $7,%xmm3 paddq %xmm4,%xmm1 movdqa %xmm5,%xmm4 psrlq $1,%xmm5 psllq $56,%xmm4 pxor %xmm5,%xmm3 psrlq $7,%xmm5 pxor %xmm4,%xmm3 psllq $7,%xmm4 pxor %xmm5,%xmm3 movdqa %xmm0,%xmm5 pxor %xmm4,%xmm3 movdqa %xmm0,%xmm4 psrlq $6,%xmm5 paddq %xmm3,%xmm1 movdqa %xmm0,%xmm3 psrlq $19,%xmm4 psllq $3,%xmm3 pxor %xmm4,%xmm5 psrlq $42,%xmm4 pxor %xmm3,%xmm5 psllq $42,%xmm3 pxor %xmm4,%xmm5 movdqa 48(%edx),%xmm4 pxor %xmm3,%xmm5 movdqa 16(%ebp),%xmm3 movq %mm4,%mm1 paddq %xmm5,%xmm1 movq -112(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) paddq %xmm1,%xmm3 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -104(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm3,-112(%edx) movdqa %xmm7,%xmm5 movdqa %xmm4,%xmm3 .byte 102,15,58,15,226,8 movdqa %xmm6,32(%edx) .byte 102,15,58,15,238,8 movdqa %xmm4,%xmm6 psrlq $7,%xmm4 paddq %xmm5,%xmm2 movdqa %xmm6,%xmm5 psrlq $1,%xmm6 psllq $56,%xmm5 pxor %xmm6,%xmm4 psrlq $7,%xmm6 pxor %xmm5,%xmm4 psllq $7,%xmm5 pxor %xmm6,%xmm4 movdqa %xmm1,%xmm6 pxor %xmm5,%xmm4 movdqa %xmm1,%xmm5 psrlq $6,%xmm6 paddq %xmm4,%xmm2 movdqa %xmm1,%xmm4 psrlq $19,%xmm5 psllq $3,%xmm4 pxor %xmm5,%xmm6 psrlq $42,%xmm5 pxor %xmm4,%xmm6 psllq $42,%xmm4 pxor %xmm5,%xmm6 movdqa (%edx),%xmm5 pxor %xmm4,%xmm6 movdqa 32(%ebp),%xmm4 movq %mm4,%mm1 paddq %xmm6,%xmm2 movq -96(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) paddq %xmm2,%xmm4 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -88(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm4,-96(%edx) movdqa %xmm0,%xmm6 movdqa %xmm5,%xmm4 .byte 102,15,58,15,235,8 movdqa %xmm7,48(%edx) .byte 102,15,58,15,247,8 movdqa %xmm5,%xmm7 psrlq $7,%xmm5 paddq %xmm6,%xmm3 movdqa %xmm7,%xmm6 psrlq $1,%xmm7 psllq $56,%xmm6 pxor %xmm7,%xmm5 psrlq $7,%xmm7 pxor %xmm6,%xmm5 psllq $7,%xmm6 pxor %xmm7,%xmm5 movdqa %xmm2,%xmm7 pxor %xmm6,%xmm5 movdqa %xmm2,%xmm6 psrlq $6,%xmm7 paddq %xmm5,%xmm3 movdqa %xmm2,%xmm5 psrlq $19,%xmm6 psllq $3,%xmm5 pxor %xmm6,%xmm7 psrlq $42,%xmm6 pxor %xmm5,%xmm7 psllq $42,%xmm5 pxor %xmm6,%xmm7 movdqa 16(%edx),%xmm6 pxor %xmm5,%xmm7 movdqa 48(%ebp),%xmm5 movq %mm4,%mm1 paddq %xmm7,%xmm3 movq -80(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) paddq %xmm3,%xmm5 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -72(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm5,-80(%edx) movdqa %xmm1,%xmm7 movdqa %xmm6,%xmm5 .byte 102,15,58,15,244,8 movdqa %xmm0,(%edx) .byte 102,15,58,15,248,8 movdqa %xmm6,%xmm0 psrlq $7,%xmm6 paddq %xmm7,%xmm4 movdqa %xmm0,%xmm7 psrlq $1,%xmm0 psllq $56,%xmm7 pxor %xmm0,%xmm6 psrlq $7,%xmm0 pxor %xmm7,%xmm6 psllq $7,%xmm7 pxor %xmm0,%xmm6 movdqa %xmm3,%xmm0 pxor %xmm7,%xmm6 movdqa %xmm3,%xmm7 psrlq $6,%xmm0 paddq %xmm6,%xmm4 movdqa %xmm3,%xmm6 psrlq $19,%xmm7 psllq $3,%xmm6 pxor %xmm7,%xmm0 psrlq $42,%xmm7 pxor %xmm6,%xmm0 psllq $42,%xmm6 pxor %xmm7,%xmm0 movdqa 32(%edx),%xmm7 pxor %xmm6,%xmm0 movdqa 64(%ebp),%xmm6 movq %mm4,%mm1 paddq %xmm0,%xmm4 movq -64(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) paddq %xmm4,%xmm6 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -56(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm6,-64(%edx) movdqa %xmm2,%xmm0 movdqa %xmm7,%xmm6 .byte 102,15,58,15,253,8 movdqa %xmm1,16(%edx) .byte 102,15,58,15,193,8 movdqa %xmm7,%xmm1 psrlq $7,%xmm7 paddq %xmm0,%xmm5 movdqa %xmm1,%xmm0 psrlq $1,%xmm1 psllq $56,%xmm0 pxor %xmm1,%xmm7 psrlq $7,%xmm1 pxor %xmm0,%xmm7 psllq $7,%xmm0 pxor %xmm1,%xmm7 movdqa %xmm4,%xmm1 pxor %xmm0,%xmm7 movdqa %xmm4,%xmm0 psrlq $6,%xmm1 paddq %xmm7,%xmm5 movdqa %xmm4,%xmm7 psrlq $19,%xmm0 psllq $3,%xmm7 pxor %xmm0,%xmm1 psrlq $42,%xmm0 pxor %xmm7,%xmm1 psllq $42,%xmm7 pxor %xmm0,%xmm1 movdqa 48(%edx),%xmm0 pxor %xmm7,%xmm1 movdqa 80(%ebp),%xmm7 movq %mm4,%mm1 paddq %xmm1,%xmm5 movq -48(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) paddq %xmm5,%xmm7 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -40(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm7,-48(%edx) movdqa %xmm3,%xmm1 movdqa %xmm0,%xmm7 .byte 102,15,58,15,198,8 movdqa %xmm2,32(%edx) .byte 102,15,58,15,202,8 movdqa %xmm0,%xmm2 psrlq $7,%xmm0 paddq %xmm1,%xmm6 movdqa %xmm2,%xmm1 psrlq $1,%xmm2 psllq $56,%xmm1 pxor %xmm2,%xmm0 psrlq $7,%xmm2 pxor %xmm1,%xmm0 psllq $7,%xmm1 pxor %xmm2,%xmm0 movdqa %xmm5,%xmm2 pxor %xmm1,%xmm0 movdqa %xmm5,%xmm1 psrlq $6,%xmm2 paddq %xmm0,%xmm6 movdqa %xmm5,%xmm0 psrlq $19,%xmm1 psllq $3,%xmm0 pxor %xmm1,%xmm2 psrlq $42,%xmm1 pxor %xmm0,%xmm2 psllq $42,%xmm0 pxor %xmm1,%xmm2 movdqa (%edx),%xmm1 pxor %xmm0,%xmm2 movdqa 96(%ebp),%xmm0 movq %mm4,%mm1 paddq %xmm2,%xmm6 movq -32(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) paddq %xmm6,%xmm0 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -24(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm0,-32(%edx) movdqa %xmm4,%xmm2 movdqa %xmm1,%xmm0 .byte 102,15,58,15,207,8 movdqa %xmm3,48(%edx) .byte 102,15,58,15,211,8 movdqa %xmm1,%xmm3 psrlq $7,%xmm1 paddq %xmm2,%xmm7 movdqa %xmm3,%xmm2 psrlq $1,%xmm3 psllq $56,%xmm2 pxor %xmm3,%xmm1 psrlq $7,%xmm3 pxor %xmm2,%xmm1 psllq $7,%xmm2 pxor %xmm3,%xmm1 movdqa %xmm6,%xmm3 pxor %xmm2,%xmm1 movdqa %xmm6,%xmm2 psrlq $6,%xmm3 paddq %xmm1,%xmm7 movdqa %xmm6,%xmm1 psrlq $19,%xmm2 psllq $3,%xmm1 pxor %xmm2,%xmm3 psrlq $42,%xmm2 pxor %xmm1,%xmm3 psllq $42,%xmm1 pxor %xmm2,%xmm3 movdqa 16(%edx),%xmm2 pxor %xmm1,%xmm3 movdqa 112(%ebp),%xmm1 movq %mm4,%mm1 paddq %xmm3,%xmm7 movq -16(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) paddq %xmm7,%xmm1 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -8(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm1,-16(%edx) leal 128(%ebp),%ebp decl %ecx jnz .L00800_47_ssse3 movdqa (%ebp),%xmm1 leal -640(%ebp),%ebp movdqu (%ebx),%xmm0 .byte 102,15,56,0,193 movdqa (%ebp),%xmm3 movdqa %xmm1,%xmm2 movdqu 16(%ebx),%xmm1 paddq %xmm0,%xmm3 .byte 102,15,56,0,202 movq %mm4,%mm1 movq -128(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -120(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm3,-128(%edx) movdqa 16(%ebp),%xmm4 movdqa %xmm2,%xmm3 movdqu 32(%ebx),%xmm2 paddq %xmm1,%xmm4 .byte 102,15,56,0,211 movq %mm4,%mm1 movq -112(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -104(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm4,-112(%edx) movdqa 32(%ebp),%xmm5 movdqa %xmm3,%xmm4 movdqu 48(%ebx),%xmm3 paddq %xmm2,%xmm5 .byte 102,15,56,0,220 movq %mm4,%mm1 movq -96(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -88(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm5,-96(%edx) movdqa 48(%ebp),%xmm6 movdqa %xmm4,%xmm5 movdqu 64(%ebx),%xmm4 paddq %xmm3,%xmm6 .byte 102,15,56,0,229 movq %mm4,%mm1 movq -80(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -72(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm6,-80(%edx) movdqa 64(%ebp),%xmm7 movdqa %xmm5,%xmm6 movdqu 80(%ebx),%xmm5 paddq %xmm4,%xmm7 .byte 102,15,56,0,238 movq %mm4,%mm1 movq -64(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -56(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm7,-64(%edx) movdqa %xmm0,(%edx) movdqa 80(%ebp),%xmm0 movdqa %xmm6,%xmm7 movdqu 96(%ebx),%xmm6 paddq %xmm5,%xmm0 .byte 102,15,56,0,247 movq %mm4,%mm1 movq -48(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -40(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm0,-48(%edx) movdqa %xmm1,16(%edx) movdqa 96(%ebp),%xmm1 movdqa %xmm7,%xmm0 movdqu 112(%ebx),%xmm7 paddq %xmm6,%xmm1 .byte 102,15,56,0,248 movq %mm4,%mm1 movq -32(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -24(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm1,-32(%edx) movdqa %xmm2,32(%edx) movdqa 112(%ebp),%xmm2 movdqa (%edx),%xmm0 paddq %xmm7,%xmm2 movq %mm4,%mm1 movq -16(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -8(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm2,-16(%edx) movq 8(%esp),%mm1 paddq %mm3,%mm0 movq 24(%esp),%mm3 movq 56(%esp),%mm7 pxor %mm1,%mm2 paddq (%esi),%mm0 paddq 8(%esi),%mm1 paddq 16(%esi),%mm2 paddq 24(%esi),%mm3 paddq 32(%esi),%mm4 paddq 40(%esi),%mm5 paddq 48(%esi),%mm6 paddq 56(%esi),%mm7 movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) cmpl %eax,%edi jb .L007loop_ssse3 movl 76(%edx),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L002loop_x86: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx movl 28(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx movl 44(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx movl 60(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 64(%edi),%eax movl 68(%edi),%ebx movl 72(%edi),%ecx movl 76(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 80(%edi),%eax movl 84(%edi),%ebx movl 88(%edi),%ecx movl 92(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 96(%edi),%eax movl 100(%edi),%ebx movl 104(%edi),%ecx movl 108(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 112(%edi),%eax movl 116(%edi),%ebx movl 120(%edi),%ecx movl 124(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx addl $128,%edi subl $72,%esp movl %edi,204(%esp) leal 8(%esp),%edi movl $16,%ecx .long 2784229001 .align 16 .L00900_15_x86: movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi shrl $9,%ecx movl %edx,%edi shrl $9,%edx movl %ecx,%ebx shll $14,%esi movl %edx,%eax shll $14,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%eax shll $4,%esi xorl %edx,%ebx shll $4,%edi xorl %esi,%ebx shrl $4,%ecx xorl %edi,%eax shrl $4,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 48(%esp),%ecx movl 52(%esp),%edx movl 56(%esp),%esi movl 60(%esp),%edi addl 64(%esp),%eax adcl 68(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx andl 40(%esp),%ecx andl 44(%esp),%edx addl 192(%esp),%eax adcl 196(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx movl (%ebp),%esi movl 4(%ebp),%edi addl %ecx,%eax adcl %edx,%ebx movl 32(%esp),%ecx movl 36(%esp),%edx addl %esi,%eax adcl %edi,%ebx movl %eax,(%esp) movl %ebx,4(%esp) addl %ecx,%eax adcl %edx,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,%esi shrl $2,%ecx movl %edx,%edi shrl $2,%edx movl %ecx,%ebx shll $4,%esi movl %edx,%eax shll $4,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%ebx shll $21,%esi xorl %edx,%eax shll $21,%edi xorl %esi,%eax shrl $21,%ecx xorl %edi,%ebx shrl $21,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl 16(%esp),%esi movl 20(%esp),%edi addl (%esp),%eax adcl 4(%esp),%ebx orl %esi,%ecx orl %edi,%edx andl 24(%esp),%ecx andl 28(%esp),%edx andl 8(%esp),%esi andl 12(%esp),%edi orl %esi,%ecx orl %edi,%edx addl %ecx,%eax adcl %edx,%ebx movl %eax,(%esp) movl %ebx,4(%esp) movb (%ebp),%dl subl $8,%esp leal 8(%ebp),%ebp cmpb $148,%dl jne .L00900_15_x86 .align 16 .L01016_79_x86: movl 312(%esp),%ecx movl 316(%esp),%edx movl %ecx,%esi shrl $1,%ecx movl %edx,%edi shrl $1,%edx movl %ecx,%eax shll $24,%esi movl %edx,%ebx shll $24,%edi xorl %esi,%ebx shrl $6,%ecx xorl %edi,%eax shrl $6,%edx xorl %ecx,%eax shll $7,%esi xorl %edx,%ebx shll $1,%edi xorl %esi,%ebx shrl $1,%ecx xorl %edi,%eax shrl $1,%edx xorl %ecx,%eax shll $6,%edi xorl %edx,%ebx xorl %edi,%eax movl %eax,(%esp) movl %ebx,4(%esp) movl 208(%esp),%ecx movl 212(%esp),%edx movl %ecx,%esi shrl $6,%ecx movl %edx,%edi shrl $6,%edx movl %ecx,%eax shll $3,%esi movl %edx,%ebx shll $3,%edi xorl %esi,%eax shrl $13,%ecx xorl %edi,%ebx shrl $13,%edx xorl %ecx,%eax shll $10,%esi xorl %edx,%ebx shll $10,%edi xorl %esi,%ebx shrl $10,%ecx xorl %edi,%eax shrl $10,%edx xorl %ecx,%ebx shll $13,%edi xorl %edx,%eax xorl %edi,%eax movl 320(%esp),%ecx movl 324(%esp),%edx addl (%esp),%eax adcl 4(%esp),%ebx movl 248(%esp),%esi movl 252(%esp),%edi addl %ecx,%eax adcl %edx,%ebx addl %esi,%eax adcl %edi,%ebx movl %eax,192(%esp) movl %ebx,196(%esp) movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi shrl $9,%ecx movl %edx,%edi shrl $9,%edx movl %ecx,%ebx shll $14,%esi movl %edx,%eax shll $14,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%eax shll $4,%esi xorl %edx,%ebx shll $4,%edi xorl %esi,%ebx shrl $4,%ecx xorl %edi,%eax shrl $4,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 48(%esp),%ecx movl 52(%esp),%edx movl 56(%esp),%esi movl 60(%esp),%edi addl 64(%esp),%eax adcl 68(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx andl 40(%esp),%ecx andl 44(%esp),%edx addl 192(%esp),%eax adcl 196(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx movl (%ebp),%esi movl 4(%ebp),%edi addl %ecx,%eax adcl %edx,%ebx movl 32(%esp),%ecx movl 36(%esp),%edx addl %esi,%eax adcl %edi,%ebx movl %eax,(%esp) movl %ebx,4(%esp) addl %ecx,%eax adcl %edx,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,%esi shrl $2,%ecx movl %edx,%edi shrl $2,%edx movl %ecx,%ebx shll $4,%esi movl %edx,%eax shll $4,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%ebx shll $21,%esi xorl %edx,%eax shll $21,%edi xorl %esi,%eax shrl $21,%ecx xorl %edi,%ebx shrl $21,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl 16(%esp),%esi movl 20(%esp),%edi addl (%esp),%eax adcl 4(%esp),%ebx orl %esi,%ecx orl %edi,%edx andl 24(%esp),%ecx andl 28(%esp),%edx andl 8(%esp),%esi andl 12(%esp),%edi orl %esi,%ecx orl %edi,%edx addl %ecx,%eax adcl %edx,%ebx movl %eax,(%esp) movl %ebx,4(%esp) movb (%ebp),%dl subl $8,%esp leal 8(%ebp),%ebp cmpb $23,%dl jne .L01016_79_x86 movl 840(%esp),%esi movl 844(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx addl 8(%esp),%eax adcl 12(%esp),%ebx movl %eax,(%esi) movl %ebx,4(%esi) addl 16(%esp),%ecx adcl 20(%esp),%edx movl %ecx,8(%esi) movl %edx,12(%esi) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx addl 24(%esp),%eax adcl 28(%esp),%ebx movl %eax,16(%esi) movl %ebx,20(%esi) addl 32(%esp),%ecx adcl 36(%esp),%edx movl %ecx,24(%esi) movl %edx,28(%esi) movl 32(%esi),%eax movl 36(%esi),%ebx movl 40(%esi),%ecx movl 44(%esi),%edx addl 40(%esp),%eax adcl 44(%esp),%ebx movl %eax,32(%esi) movl %ebx,36(%esi) addl 48(%esp),%ecx adcl 52(%esp),%edx movl %ecx,40(%esi) movl %edx,44(%esi) movl 48(%esi),%eax movl 52(%esi),%ebx movl 56(%esi),%ecx movl 60(%esi),%edx addl 56(%esp),%eax adcl 60(%esp),%ebx movl %eax,48(%esi) movl %ebx,52(%esi) addl 64(%esp),%ecx adcl 68(%esp),%edx movl %ecx,56(%esi) movl %edx,60(%esi) addl $840,%esp subl $640,%ebp cmpl 8(%esp),%edi jb .L002loop_x86 movl 12(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001K512: .long 3609767458,1116352408 .long 602891725,1899447441 .long 3964484399,3049323471 .long 2173295548,3921009573 .long 4081628472,961987163 .long 3053834265,1508970993 .long 2937671579,2453635748 .long 3664609560,2870763221 .long 2734883394,3624381080 .long 1164996542,310598401 .long 1323610764,607225278 .long 3590304994,1426881987 .long 4068182383,1925078388 .long 991336113,2162078206 .long 633803317,2614888103 .long 3479774868,3248222580 .long 2666613458,3835390401 .long 944711139,4022224774 .long 2341262773,264347078 .long 2007800933,604807628 .long 1495990901,770255983 .long 1856431235,1249150122 .long 3175218132,1555081692 .long 2198950837,1996064986 .long 3999719339,2554220882 .long 766784016,2821834349 .long 2566594879,2952996808 .long 3203337956,3210313671 .long 1034457026,3336571891 .long 2466948901,3584528711 .long 3758326383,113926993 .long 168717936,338241895 .long 1188179964,666307205 .long 1546045734,773529912 .long 1522805485,1294757372 .long 2643833823,1396182291 .long 2343527390,1695183700 .long 1014477480,1986661051 .long 1206759142,2177026350 .long 344077627,2456956037 .long 1290863460,2730485921 .long 3158454273,2820302411 .long 3505952657,3259730800 .long 106217008,3345764771 .long 3606008344,3516065817 .long 1432725776,3600352804 .long 1467031594,4094571909 .long 851169720,275423344 .long 3100823752,430227734 .long 1363258195,506948616 .long 3750685593,659060556 .long 3785050280,883997877 .long 3318307427,958139571 .long 3812723403,1322822218 .long 2003034995,1537002063 .long 3602036899,1747873779 .long 1575990012,1955562222 .long 1125592928,2024104815 .long 2716904306,2227730452 .long 442776044,2361852424 .long 593698344,2428436474 .long 3733110249,2756734187 .long 2999351573,3204031479 .long 3815920427,3329325298 .long 3928383900,3391569614 .long 566280711,3515267271 .long 3454069534,3940187606 .long 4000239992,4118630271 .long 1914138554,116418474 .long 2731055270,174292421 .long 3203993006,289380356 .long 320620315,460393269 .long 587496836,685471733 .long 1086792851,852142971 .long 365543100,1017036298 .long 2618297676,1126000580 .long 3409855158,1288033470 .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 .long 67438087,66051 .long 202182159,134810123 .size sha512_block_data_order,.-.L_sha512_block_data_order_begin .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "sha512-586.S" .text .globl sha512_block_data_order .type sha512_block_data_order,@function .align 16 sha512_block_data_order: .L_sha512_block_data_order_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl %esp,%ebx call .L000pic_point .L000pic_point: popl %ebp leal .L001K512-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $7,%eax addl %edi,%eax movl %esi,(%esp) movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) leal OPENSSL_ia32cap_P,%edx movl (%edx),%ecx testl $67108864,%ecx jz .L002loop_x86 movl 4(%edx),%edx movq (%esi),%mm0 andl $16777216,%ecx movq 8(%esi),%mm1 andl $512,%edx movq 16(%esi),%mm2 orl %edx,%ecx movq 24(%esi),%mm3 movq 32(%esi),%mm4 movq 40(%esi),%mm5 movq 48(%esi),%mm6 movq 56(%esi),%mm7 cmpl $16777728,%ecx je .L003SSSE3 subl $80,%esp jmp .L004loop_sse2 .align 16 .L004loop_sse2: movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) pxor %mm1,%mm2 movq %mm7,56(%esp) movq %mm0,%mm3 movl (%edi),%eax movl 4(%edi),%ebx addl $8,%edi movl $15,%edx bswap %eax bswap %ebx jmp .L00500_14_sse2 .align 16 .L00500_14_sse2: movd %eax,%mm1 movl (%edi),%eax movd %ebx,%mm7 movl 4(%edi),%ebx addl $8,%edi bswap %eax bswap %ebx punpckldq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm3,%mm0 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm2,%mm3 movq %mm0,%mm2 addl $8,%ebp paddq %mm6,%mm3 movq 48(%esp),%mm6 decl %edx jnz .L00500_14_sse2 movd %eax,%mm1 movd %ebx,%mm7 punpckldq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm3,%mm0 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm2,%mm3 movq %mm0,%mm2 addl $8,%ebp paddq %mm6,%mm3 pxor %mm0,%mm0 movl $32,%edx jmp .L00616_79_sse2 .align 16 .L00616_79_sse2: movq 88(%esp),%mm5 movq %mm7,%mm1 psrlq $1,%mm7 movq %mm5,%mm6 psrlq $6,%mm5 psllq $56,%mm1 paddq %mm3,%mm0 movq %mm7,%mm3 psrlq $6,%mm7 pxor %mm1,%mm3 psllq $7,%mm1 pxor %mm7,%mm3 psrlq $1,%mm7 pxor %mm1,%mm3 movq %mm5,%mm1 psrlq $13,%mm5 pxor %mm3,%mm7 psllq $3,%mm6 pxor %mm5,%mm1 paddq 200(%esp),%mm7 pxor %mm6,%mm1 psrlq $42,%mm5 paddq 128(%esp),%mm7 pxor %mm5,%mm1 psllq $42,%mm6 movq 40(%esp),%mm5 pxor %mm6,%mm1 movq 48(%esp),%mm6 paddq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm6,%mm2 addl $8,%ebp movq 88(%esp),%mm5 movq %mm7,%mm1 psrlq $1,%mm7 movq %mm5,%mm6 psrlq $6,%mm5 psllq $56,%mm1 paddq %mm3,%mm2 movq %mm7,%mm3 psrlq $6,%mm7 pxor %mm1,%mm3 psllq $7,%mm1 pxor %mm7,%mm3 psrlq $1,%mm7 pxor %mm1,%mm3 movq %mm5,%mm1 psrlq $13,%mm5 pxor %mm3,%mm7 psllq $3,%mm6 pxor %mm5,%mm1 paddq 200(%esp),%mm7 pxor %mm6,%mm1 psrlq $42,%mm5 paddq 128(%esp),%mm7 pxor %mm5,%mm1 psllq $42,%mm6 movq 40(%esp),%mm5 pxor %mm6,%mm1 movq 48(%esp),%mm6 paddq %mm1,%mm7 movq %mm4,%mm1 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 paddq (%ebp),%mm7 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 subl $8,%esp psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 192(%esp),%mm7 paddq %mm6,%mm0 addl $8,%ebp decl %edx jnz .L00616_79_sse2 paddq %mm3,%mm0 movq 8(%esp),%mm1 movq 24(%esp),%mm3 movq 40(%esp),%mm5 movq 48(%esp),%mm6 movq 56(%esp),%mm7 pxor %mm1,%mm2 paddq (%esi),%mm0 paddq 8(%esi),%mm1 paddq 16(%esi),%mm2 paddq 24(%esi),%mm3 paddq 32(%esi),%mm4 paddq 40(%esi),%mm5 paddq 48(%esi),%mm6 paddq 56(%esi),%mm7 movl $640,%eax movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) leal (%esp,%eax,1),%esp subl %eax,%ebp cmpl 88(%esp),%edi jb .L004loop_sse2 movl 92(%esp),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .align 32 .L003SSSE3: leal -64(%esp),%edx subl $256,%esp movdqa 640(%ebp),%xmm1 movdqu (%edi),%xmm0 .byte 102,15,56,0,193 movdqa (%ebp),%xmm3 movdqa %xmm1,%xmm2 movdqu 16(%edi),%xmm1 paddq %xmm0,%xmm3 .byte 102,15,56,0,202 movdqa %xmm3,-128(%edx) movdqa 16(%ebp),%xmm4 movdqa %xmm2,%xmm3 movdqu 32(%edi),%xmm2 paddq %xmm1,%xmm4 .byte 102,15,56,0,211 movdqa %xmm4,-112(%edx) movdqa 32(%ebp),%xmm5 movdqa %xmm3,%xmm4 movdqu 48(%edi),%xmm3 paddq %xmm2,%xmm5 .byte 102,15,56,0,220 movdqa %xmm5,-96(%edx) movdqa 48(%ebp),%xmm6 movdqa %xmm4,%xmm5 movdqu 64(%edi),%xmm4 paddq %xmm3,%xmm6 .byte 102,15,56,0,229 movdqa %xmm6,-80(%edx) movdqa 64(%ebp),%xmm7 movdqa %xmm5,%xmm6 movdqu 80(%edi),%xmm5 paddq %xmm4,%xmm7 .byte 102,15,56,0,238 movdqa %xmm7,-64(%edx) movdqa %xmm0,(%edx) movdqa 80(%ebp),%xmm0 movdqa %xmm6,%xmm7 movdqu 96(%edi),%xmm6 paddq %xmm5,%xmm0 .byte 102,15,56,0,247 movdqa %xmm0,-48(%edx) movdqa %xmm1,16(%edx) movdqa 96(%ebp),%xmm1 movdqa %xmm7,%xmm0 movdqu 112(%edi),%xmm7 paddq %xmm6,%xmm1 .byte 102,15,56,0,248 movdqa %xmm1,-32(%edx) movdqa %xmm2,32(%edx) movdqa 112(%ebp),%xmm2 movdqa (%edx),%xmm0 paddq %xmm7,%xmm2 movdqa %xmm2,-16(%edx) nop .align 32 .L007loop_ssse3: movdqa 16(%edx),%xmm2 movdqa %xmm3,48(%edx) leal 128(%ebp),%ebp movq %mm1,8(%esp) movl %edi,%ebx movq %mm2,16(%esp) leal 128(%edi),%edi movq %mm3,24(%esp) cmpl %eax,%edi movq %mm5,40(%esp) cmovbl %edi,%ebx movq %mm6,48(%esp) movl $4,%ecx pxor %mm1,%mm2 movq %mm7,56(%esp) pxor %mm3,%mm3 jmp .L00800_47_ssse3 .align 32 .L00800_47_ssse3: movdqa %xmm5,%xmm3 movdqa %xmm2,%xmm1 .byte 102,15,58,15,208,8 movdqa %xmm4,(%edx) .byte 102,15,58,15,220,8 movdqa %xmm2,%xmm4 psrlq $7,%xmm2 paddq %xmm3,%xmm0 movdqa %xmm4,%xmm3 psrlq $1,%xmm4 psllq $56,%xmm3 pxor %xmm4,%xmm2 psrlq $7,%xmm4 pxor %xmm3,%xmm2 psllq $7,%xmm3 pxor %xmm4,%xmm2 movdqa %xmm7,%xmm4 pxor %xmm3,%xmm2 movdqa %xmm7,%xmm3 psrlq $6,%xmm4 paddq %xmm2,%xmm0 movdqa %xmm7,%xmm2 psrlq $19,%xmm3 psllq $3,%xmm2 pxor %xmm3,%xmm4 psrlq $42,%xmm3 pxor %xmm2,%xmm4 psllq $42,%xmm2 pxor %xmm3,%xmm4 movdqa 32(%edx),%xmm3 pxor %xmm2,%xmm4 movdqa (%ebp),%xmm2 movq %mm4,%mm1 paddq %xmm4,%xmm0 movq -128(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) paddq %xmm0,%xmm2 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -120(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm2,-128(%edx) movdqa %xmm6,%xmm4 movdqa %xmm3,%xmm2 .byte 102,15,58,15,217,8 movdqa %xmm5,16(%edx) .byte 102,15,58,15,229,8 movdqa %xmm3,%xmm5 psrlq $7,%xmm3 paddq %xmm4,%xmm1 movdqa %xmm5,%xmm4 psrlq $1,%xmm5 psllq $56,%xmm4 pxor %xmm5,%xmm3 psrlq $7,%xmm5 pxor %xmm4,%xmm3 psllq $7,%xmm4 pxor %xmm5,%xmm3 movdqa %xmm0,%xmm5 pxor %xmm4,%xmm3 movdqa %xmm0,%xmm4 psrlq $6,%xmm5 paddq %xmm3,%xmm1 movdqa %xmm0,%xmm3 psrlq $19,%xmm4 psllq $3,%xmm3 pxor %xmm4,%xmm5 psrlq $42,%xmm4 pxor %xmm3,%xmm5 psllq $42,%xmm3 pxor %xmm4,%xmm5 movdqa 48(%edx),%xmm4 pxor %xmm3,%xmm5 movdqa 16(%ebp),%xmm3 movq %mm4,%mm1 paddq %xmm5,%xmm1 movq -112(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) paddq %xmm1,%xmm3 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -104(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm3,-112(%edx) movdqa %xmm7,%xmm5 movdqa %xmm4,%xmm3 .byte 102,15,58,15,226,8 movdqa %xmm6,32(%edx) .byte 102,15,58,15,238,8 movdqa %xmm4,%xmm6 psrlq $7,%xmm4 paddq %xmm5,%xmm2 movdqa %xmm6,%xmm5 psrlq $1,%xmm6 psllq $56,%xmm5 pxor %xmm6,%xmm4 psrlq $7,%xmm6 pxor %xmm5,%xmm4 psllq $7,%xmm5 pxor %xmm6,%xmm4 movdqa %xmm1,%xmm6 pxor %xmm5,%xmm4 movdqa %xmm1,%xmm5 psrlq $6,%xmm6 paddq %xmm4,%xmm2 movdqa %xmm1,%xmm4 psrlq $19,%xmm5 psllq $3,%xmm4 pxor %xmm5,%xmm6 psrlq $42,%xmm5 pxor %xmm4,%xmm6 psllq $42,%xmm4 pxor %xmm5,%xmm6 movdqa (%edx),%xmm5 pxor %xmm4,%xmm6 movdqa 32(%ebp),%xmm4 movq %mm4,%mm1 paddq %xmm6,%xmm2 movq -96(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) paddq %xmm2,%xmm4 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -88(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm4,-96(%edx) movdqa %xmm0,%xmm6 movdqa %xmm5,%xmm4 .byte 102,15,58,15,235,8 movdqa %xmm7,48(%edx) .byte 102,15,58,15,247,8 movdqa %xmm5,%xmm7 psrlq $7,%xmm5 paddq %xmm6,%xmm3 movdqa %xmm7,%xmm6 psrlq $1,%xmm7 psllq $56,%xmm6 pxor %xmm7,%xmm5 psrlq $7,%xmm7 pxor %xmm6,%xmm5 psllq $7,%xmm6 pxor %xmm7,%xmm5 movdqa %xmm2,%xmm7 pxor %xmm6,%xmm5 movdqa %xmm2,%xmm6 psrlq $6,%xmm7 paddq %xmm5,%xmm3 movdqa %xmm2,%xmm5 psrlq $19,%xmm6 psllq $3,%xmm5 pxor %xmm6,%xmm7 psrlq $42,%xmm6 pxor %xmm5,%xmm7 psllq $42,%xmm5 pxor %xmm6,%xmm7 movdqa 16(%edx),%xmm6 pxor %xmm5,%xmm7 movdqa 48(%ebp),%xmm5 movq %mm4,%mm1 paddq %xmm7,%xmm3 movq -80(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) paddq %xmm3,%xmm5 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -72(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm5,-80(%edx) movdqa %xmm1,%xmm7 movdqa %xmm6,%xmm5 .byte 102,15,58,15,244,8 movdqa %xmm0,(%edx) .byte 102,15,58,15,248,8 movdqa %xmm6,%xmm0 psrlq $7,%xmm6 paddq %xmm7,%xmm4 movdqa %xmm0,%xmm7 psrlq $1,%xmm0 psllq $56,%xmm7 pxor %xmm0,%xmm6 psrlq $7,%xmm0 pxor %xmm7,%xmm6 psllq $7,%xmm7 pxor %xmm0,%xmm6 movdqa %xmm3,%xmm0 pxor %xmm7,%xmm6 movdqa %xmm3,%xmm7 psrlq $6,%xmm0 paddq %xmm6,%xmm4 movdqa %xmm3,%xmm6 psrlq $19,%xmm7 psllq $3,%xmm6 pxor %xmm7,%xmm0 psrlq $42,%xmm7 pxor %xmm6,%xmm0 psllq $42,%xmm6 pxor %xmm7,%xmm0 movdqa 32(%edx),%xmm7 pxor %xmm6,%xmm0 movdqa 64(%ebp),%xmm6 movq %mm4,%mm1 paddq %xmm0,%xmm4 movq -64(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) paddq %xmm4,%xmm6 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -56(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm6,-64(%edx) movdqa %xmm2,%xmm0 movdqa %xmm7,%xmm6 .byte 102,15,58,15,253,8 movdqa %xmm1,16(%edx) .byte 102,15,58,15,193,8 movdqa %xmm7,%xmm1 psrlq $7,%xmm7 paddq %xmm0,%xmm5 movdqa %xmm1,%xmm0 psrlq $1,%xmm1 psllq $56,%xmm0 pxor %xmm1,%xmm7 psrlq $7,%xmm1 pxor %xmm0,%xmm7 psllq $7,%xmm0 pxor %xmm1,%xmm7 movdqa %xmm4,%xmm1 pxor %xmm0,%xmm7 movdqa %xmm4,%xmm0 psrlq $6,%xmm1 paddq %xmm7,%xmm5 movdqa %xmm4,%xmm7 psrlq $19,%xmm0 psllq $3,%xmm7 pxor %xmm0,%xmm1 psrlq $42,%xmm0 pxor %xmm7,%xmm1 psllq $42,%xmm7 pxor %xmm0,%xmm1 movdqa 48(%edx),%xmm0 pxor %xmm7,%xmm1 movdqa 80(%ebp),%xmm7 movq %mm4,%mm1 paddq %xmm1,%xmm5 movq -48(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) paddq %xmm5,%xmm7 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -40(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm7,-48(%edx) movdqa %xmm3,%xmm1 movdqa %xmm0,%xmm7 .byte 102,15,58,15,198,8 movdqa %xmm2,32(%edx) .byte 102,15,58,15,202,8 movdqa %xmm0,%xmm2 psrlq $7,%xmm0 paddq %xmm1,%xmm6 movdqa %xmm2,%xmm1 psrlq $1,%xmm2 psllq $56,%xmm1 pxor %xmm2,%xmm0 psrlq $7,%xmm2 pxor %xmm1,%xmm0 psllq $7,%xmm1 pxor %xmm2,%xmm0 movdqa %xmm5,%xmm2 pxor %xmm1,%xmm0 movdqa %xmm5,%xmm1 psrlq $6,%xmm2 paddq %xmm0,%xmm6 movdqa %xmm5,%xmm0 psrlq $19,%xmm1 psllq $3,%xmm0 pxor %xmm1,%xmm2 psrlq $42,%xmm1 pxor %xmm0,%xmm2 psllq $42,%xmm0 pxor %xmm1,%xmm2 movdqa (%edx),%xmm1 pxor %xmm0,%xmm2 movdqa 96(%ebp),%xmm0 movq %mm4,%mm1 paddq %xmm2,%xmm6 movq -32(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) paddq %xmm6,%xmm0 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -24(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm0,-32(%edx) movdqa %xmm4,%xmm2 movdqa %xmm1,%xmm0 .byte 102,15,58,15,207,8 movdqa %xmm3,48(%edx) .byte 102,15,58,15,211,8 movdqa %xmm1,%xmm3 psrlq $7,%xmm1 paddq %xmm2,%xmm7 movdqa %xmm3,%xmm2 psrlq $1,%xmm3 psllq $56,%xmm2 pxor %xmm3,%xmm1 psrlq $7,%xmm3 pxor %xmm2,%xmm1 psllq $7,%xmm2 pxor %xmm3,%xmm1 movdqa %xmm6,%xmm3 pxor %xmm2,%xmm1 movdqa %xmm6,%xmm2 psrlq $6,%xmm3 paddq %xmm1,%xmm7 movdqa %xmm6,%xmm1 psrlq $19,%xmm2 psllq $3,%xmm1 pxor %xmm2,%xmm3 psrlq $42,%xmm2 pxor %xmm1,%xmm3 psllq $42,%xmm1 pxor %xmm2,%xmm3 movdqa 16(%edx),%xmm2 pxor %xmm1,%xmm3 movdqa 112(%ebp),%xmm1 movq %mm4,%mm1 paddq %xmm3,%xmm7 movq -16(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) paddq %xmm7,%xmm1 pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -8(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm1,-16(%edx) leal 128(%ebp),%ebp decl %ecx jnz .L00800_47_ssse3 movdqa (%ebp),%xmm1 leal -640(%ebp),%ebp movdqu (%ebx),%xmm0 .byte 102,15,56,0,193 movdqa (%ebp),%xmm3 movdqa %xmm1,%xmm2 movdqu 16(%ebx),%xmm1 paddq %xmm0,%xmm3 .byte 102,15,56,0,202 movq %mm4,%mm1 movq -128(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -120(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm3,-128(%edx) movdqa 16(%ebp),%xmm4 movdqa %xmm2,%xmm3 movdqu 32(%ebx),%xmm2 paddq %xmm1,%xmm4 .byte 102,15,56,0,211 movq %mm4,%mm1 movq -112(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -104(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm4,-112(%edx) movdqa 32(%ebp),%xmm5 movdqa %xmm3,%xmm4 movdqu 48(%ebx),%xmm3 paddq %xmm2,%xmm5 .byte 102,15,56,0,220 movq %mm4,%mm1 movq -96(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -88(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm5,-96(%edx) movdqa 48(%ebp),%xmm6 movdqa %xmm4,%xmm5 movdqu 64(%ebx),%xmm4 paddq %xmm3,%xmm6 .byte 102,15,56,0,229 movq %mm4,%mm1 movq -80(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -72(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm6,-80(%edx) movdqa 64(%ebp),%xmm7 movdqa %xmm5,%xmm6 movdqu 80(%ebx),%xmm5 paddq %xmm4,%xmm7 .byte 102,15,56,0,238 movq %mm4,%mm1 movq -64(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 56(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 8(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 32(%esp),%mm5 paddq %mm6,%mm2 movq 40(%esp),%mm6 movq %mm4,%mm1 movq -56(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,24(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,56(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 48(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 16(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq (%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 24(%esp),%mm5 paddq %mm6,%mm0 movq 32(%esp),%mm6 movdqa %xmm7,-64(%edx) movdqa %xmm0,(%edx) movdqa 80(%ebp),%xmm0 movdqa %xmm6,%xmm7 movdqu 96(%ebx),%xmm6 paddq %xmm5,%xmm0 .byte 102,15,56,0,247 movq %mm4,%mm1 movq -48(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,16(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,48(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 40(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 16(%esp),%mm5 paddq %mm6,%mm2 movq 24(%esp),%mm6 movq %mm4,%mm1 movq -40(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,8(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,40(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 32(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq (%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 48(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 8(%esp),%mm5 paddq %mm6,%mm0 movq 16(%esp),%mm6 movdqa %xmm0,-48(%edx) movdqa %xmm1,16(%edx) movdqa 96(%ebp),%xmm1 movdqa %xmm7,%xmm0 movdqu 112(%ebx),%xmm7 paddq %xmm6,%xmm1 .byte 102,15,56,0,248 movq %mm4,%mm1 movq -32(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,32(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 24(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 56(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq (%esp),%mm5 paddq %mm6,%mm2 movq 8(%esp),%mm6 movq %mm4,%mm1 movq -24(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,56(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,24(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 16(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 48(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 32(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 56(%esp),%mm5 paddq %mm6,%mm0 movq (%esp),%mm6 movdqa %xmm1,-32(%edx) movdqa %xmm2,32(%edx) movdqa 112(%ebp),%xmm2 movdqa (%edx),%xmm0 paddq %xmm7,%xmm2 movq %mm4,%mm1 movq -16(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,48(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm0,16(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq 8(%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 40(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm0,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 24(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 pxor %mm7,%mm6 movq 48(%esp),%mm5 paddq %mm6,%mm2 movq 56(%esp),%mm6 movq %mm4,%mm1 movq -8(%edx),%mm7 pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,40(%esp) pand %mm4,%mm5 psllq $23,%mm4 paddq %mm3,%mm2 movq %mm1,%mm3 psrlq $4,%mm1 pxor %mm6,%mm5 pxor %mm4,%mm3 psllq $23,%mm4 pxor %mm1,%mm3 movq %mm2,8(%esp) paddq %mm5,%mm7 pxor %mm4,%mm3 psrlq $23,%mm1 paddq (%esp),%mm7 pxor %mm1,%mm3 psllq $4,%mm4 pxor %mm4,%mm3 movq 32(%esp),%mm4 paddq %mm7,%mm3 movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 movq %mm2,%mm6 movq %mm5,%mm7 psllq $25,%mm6 movq 16(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm2 psrlq $5,%mm5 pxor %mm6,%mm7 pand %mm2,%mm0 psllq $6,%mm6 pxor %mm5,%mm7 pxor %mm1,%mm0 pxor %mm7,%mm6 movq 40(%esp),%mm5 paddq %mm6,%mm0 movq 48(%esp),%mm6 movdqa %xmm2,-16(%edx) movq 8(%esp),%mm1 paddq %mm3,%mm0 movq 24(%esp),%mm3 movq 56(%esp),%mm7 pxor %mm1,%mm2 paddq (%esi),%mm0 paddq 8(%esi),%mm1 paddq 16(%esi),%mm2 paddq 24(%esi),%mm3 paddq 32(%esi),%mm4 paddq 40(%esi),%mm5 paddq 48(%esi),%mm6 paddq 56(%esi),%mm7 movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) cmpl %eax,%edi jb .L007loop_ssse3 movl 76(%edx),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .align 16 .L002loop_x86: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx movl 12(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx movl 28(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx movl 44(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx movl 60(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 64(%edi),%eax movl 68(%edi),%ebx movl 72(%edi),%ecx movl 76(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 80(%edi),%eax movl 84(%edi),%ebx movl 88(%edi),%ecx movl 92(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 96(%edi),%eax movl 100(%edi),%ebx movl 104(%edi),%ecx movl 108(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx movl 112(%edi),%eax movl 116(%edi),%ebx movl 120(%edi),%ecx movl 124(%edi),%edx bswap %eax bswap %ebx bswap %ecx bswap %edx pushl %eax pushl %ebx pushl %ecx pushl %edx addl $128,%edi subl $72,%esp movl %edi,204(%esp) leal 8(%esp),%edi movl $16,%ecx .long 2784229001 .align 16 .L00900_15_x86: movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi shrl $9,%ecx movl %edx,%edi shrl $9,%edx movl %ecx,%ebx shll $14,%esi movl %edx,%eax shll $14,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%eax shll $4,%esi xorl %edx,%ebx shll $4,%edi xorl %esi,%ebx shrl $4,%ecx xorl %edi,%eax shrl $4,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 48(%esp),%ecx movl 52(%esp),%edx movl 56(%esp),%esi movl 60(%esp),%edi addl 64(%esp),%eax adcl 68(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx andl 40(%esp),%ecx andl 44(%esp),%edx addl 192(%esp),%eax adcl 196(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx movl (%ebp),%esi movl 4(%ebp),%edi addl %ecx,%eax adcl %edx,%ebx movl 32(%esp),%ecx movl 36(%esp),%edx addl %esi,%eax adcl %edi,%ebx movl %eax,(%esp) movl %ebx,4(%esp) addl %ecx,%eax adcl %edx,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,%esi shrl $2,%ecx movl %edx,%edi shrl $2,%edx movl %ecx,%ebx shll $4,%esi movl %edx,%eax shll $4,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%ebx shll $21,%esi xorl %edx,%eax shll $21,%edi xorl %esi,%eax shrl $21,%ecx xorl %edi,%ebx shrl $21,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl 16(%esp),%esi movl 20(%esp),%edi addl (%esp),%eax adcl 4(%esp),%ebx orl %esi,%ecx orl %edi,%edx andl 24(%esp),%ecx andl 28(%esp),%edx andl 8(%esp),%esi andl 12(%esp),%edi orl %esi,%ecx orl %edi,%edx addl %ecx,%eax adcl %edx,%ebx movl %eax,(%esp) movl %ebx,4(%esp) movb (%ebp),%dl subl $8,%esp leal 8(%ebp),%ebp cmpb $148,%dl jne .L00900_15_x86 .align 16 .L01016_79_x86: movl 312(%esp),%ecx movl 316(%esp),%edx movl %ecx,%esi shrl $1,%ecx movl %edx,%edi shrl $1,%edx movl %ecx,%eax shll $24,%esi movl %edx,%ebx shll $24,%edi xorl %esi,%ebx shrl $6,%ecx xorl %edi,%eax shrl $6,%edx xorl %ecx,%eax shll $7,%esi xorl %edx,%ebx shll $1,%edi xorl %esi,%ebx shrl $1,%ecx xorl %edi,%eax shrl $1,%edx xorl %ecx,%eax shll $6,%edi xorl %edx,%ebx xorl %edi,%eax movl %eax,(%esp) movl %ebx,4(%esp) movl 208(%esp),%ecx movl 212(%esp),%edx movl %ecx,%esi shrl $6,%ecx movl %edx,%edi shrl $6,%edx movl %ecx,%eax shll $3,%esi movl %edx,%ebx shll $3,%edi xorl %esi,%eax shrl $13,%ecx xorl %edi,%ebx shrl $13,%edx xorl %ecx,%eax shll $10,%esi xorl %edx,%ebx shll $10,%edi xorl %esi,%ebx shrl $10,%ecx xorl %edi,%eax shrl $10,%edx xorl %ecx,%ebx shll $13,%edi xorl %edx,%eax xorl %edi,%eax movl 320(%esp),%ecx movl 324(%esp),%edx addl (%esp),%eax adcl 4(%esp),%ebx movl 248(%esp),%esi movl 252(%esp),%edi addl %ecx,%eax adcl %edx,%ebx addl %esi,%eax adcl %edi,%ebx movl %eax,192(%esp) movl %ebx,196(%esp) movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi shrl $9,%ecx movl %edx,%edi shrl $9,%edx movl %ecx,%ebx shll $14,%esi movl %edx,%eax shll $14,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%eax shll $4,%esi xorl %edx,%ebx shll $4,%edi xorl %esi,%ebx shrl $4,%ecx xorl %edi,%eax shrl $4,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 48(%esp),%ecx movl 52(%esp),%edx movl 56(%esp),%esi movl 60(%esp),%edi addl 64(%esp),%eax adcl 68(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx andl 40(%esp),%ecx andl 44(%esp),%edx addl 192(%esp),%eax adcl 196(%esp),%ebx xorl %esi,%ecx xorl %edi,%edx movl (%ebp),%esi movl 4(%ebp),%edi addl %ecx,%eax adcl %edx,%ebx movl 32(%esp),%ecx movl 36(%esp),%edx addl %esi,%eax adcl %edi,%ebx movl %eax,(%esp) movl %ebx,4(%esp) addl %ecx,%eax adcl %edx,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl %eax,32(%esp) movl %ebx,36(%esp) movl %ecx,%esi shrl $2,%ecx movl %edx,%edi shrl $2,%edx movl %ecx,%ebx shll $4,%esi movl %edx,%eax shll $4,%edi xorl %esi,%ebx shrl $5,%ecx xorl %edi,%eax shrl $5,%edx xorl %ecx,%ebx shll $21,%esi xorl %edx,%eax shll $21,%edi xorl %esi,%eax shrl $21,%ecx xorl %edi,%ebx shrl $21,%edx xorl %ecx,%eax shll $5,%esi xorl %edx,%ebx shll $5,%edi xorl %esi,%eax xorl %edi,%ebx movl 8(%esp),%ecx movl 12(%esp),%edx movl 16(%esp),%esi movl 20(%esp),%edi addl (%esp),%eax adcl 4(%esp),%ebx orl %esi,%ecx orl %edi,%edx andl 24(%esp),%ecx andl 28(%esp),%edx andl 8(%esp),%esi andl 12(%esp),%edi orl %esi,%ecx orl %edi,%edx addl %ecx,%eax adcl %edx,%ebx movl %eax,(%esp) movl %ebx,4(%esp) movb (%ebp),%dl subl $8,%esp leal 8(%ebp),%ebp cmpb $23,%dl jne .L01016_79_x86 movl 840(%esp),%esi movl 844(%esp),%edi movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx addl 8(%esp),%eax adcl 12(%esp),%ebx movl %eax,(%esi) movl %ebx,4(%esi) addl 16(%esp),%ecx adcl 20(%esp),%edx movl %ecx,8(%esi) movl %edx,12(%esi) movl 16(%esi),%eax movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edx addl 24(%esp),%eax adcl 28(%esp),%ebx movl %eax,16(%esi) movl %ebx,20(%esi) addl 32(%esp),%ecx adcl 36(%esp),%edx movl %ecx,24(%esi) movl %edx,28(%esi) movl 32(%esi),%eax movl 36(%esi),%ebx movl 40(%esi),%ecx movl 44(%esi),%edx addl 40(%esp),%eax adcl 44(%esp),%ebx movl %eax,32(%esi) movl %ebx,36(%esi) addl 48(%esp),%ecx adcl 52(%esp),%edx movl %ecx,40(%esi) movl %edx,44(%esi) movl 48(%esi),%eax movl 52(%esi),%ebx movl 56(%esi),%ecx movl 60(%esi),%edx addl 56(%esp),%eax adcl 60(%esp),%ebx movl %eax,48(%esi) movl %ebx,52(%esi) addl 64(%esp),%ecx adcl 68(%esp),%edx movl %ecx,56(%esi) movl %edx,60(%esi) addl $840,%esp subl $640,%ebp cmpl 8(%esp),%edi jb .L002loop_x86 movl 12(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001K512: .long 3609767458,1116352408 .long 602891725,1899447441 .long 3964484399,3049323471 .long 2173295548,3921009573 .long 4081628472,961987163 .long 3053834265,1508970993 .long 2937671579,2453635748 .long 3664609560,2870763221 .long 2734883394,3624381080 .long 1164996542,310598401 .long 1323610764,607225278 .long 3590304994,1426881987 .long 4068182383,1925078388 .long 991336113,2162078206 .long 633803317,2614888103 .long 3479774868,3248222580 .long 2666613458,3835390401 .long 944711139,4022224774 .long 2341262773,264347078 .long 2007800933,604807628 .long 1495990901,770255983 .long 1856431235,1249150122 .long 3175218132,1555081692 .long 2198950837,1996064986 .long 3999719339,2554220882 .long 766784016,2821834349 .long 2566594879,2952996808 .long 3203337956,3210313671 .long 1034457026,3336571891 .long 2466948901,3584528711 .long 3758326383,113926993 .long 168717936,338241895 .long 1188179964,666307205 .long 1546045734,773529912 .long 1522805485,1294757372 .long 2643833823,1396182291 .long 2343527390,1695183700 .long 1014477480,1986661051 .long 1206759142,2177026350 .long 344077627,2456956037 .long 1290863460,2730485921 .long 3158454273,2820302411 .long 3505952657,3259730800 .long 106217008,3345764771 .long 3606008344,3516065817 .long 1432725776,3600352804 .long 1467031594,4094571909 .long 851169720,275423344 .long 3100823752,430227734 .long 1363258195,506948616 .long 3750685593,659060556 .long 3785050280,883997877 .long 3318307427,958139571 .long 3812723403,1322822218 .long 2003034995,1537002063 .long 3602036899,1747873779 .long 1575990012,1955562222 .long 1125592928,2024104815 .long 2716904306,2227730452 .long 442776044,2361852424 .long 593698344,2428436474 .long 3733110249,2756734187 .long 2999351573,3204031479 .long 3815920427,3329325298 .long 3928383900,3391569614 .long 566280711,3515267271 .long 3454069534,3940187606 .long 4000239992,4118630271 .long 1914138554,116418474 .long 2731055270,174292421 .long 3203993006,289380356 .long 320620315,460393269 .long 587496836,685471733 .long 1086792851,852142971 .long 365543100,1017036298 .long 2618297676,1126000580 .long 3409855158,1288033470 .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 .long 67438087,66051 .long 202182159,134810123 .size sha512_block_data_order,.-.L_sha512_block_data_order_begin .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/vpaes-x86.S =================================================================== --- head/secure/lib/libcrypto/i386/vpaes-x86.S (revision 299480) +++ head/secure/lib/libcrypto/i386/vpaes-x86.S (revision 299481) @@ -1,1326 +1,1327 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from vpaes-x86.pl. #ifdef PIC .file "vpaes-x86.S" .text .align 64 .L_vpaes_consts: .long 218628480,235210255,168496130,67568393 .long 252381056,17041926,33884169,51187212 .long 252645135,252645135,252645135,252645135 .long 1512730624,3266504856,1377990664,3401244816 .long 830229760,1275146365,2969422977,3447763452 .long 3411033600,2979783055,338359620,2782886510 .long 4209124096,907596821,221174255,1006095553 .long 191964160,3799684038,3164090317,1589111125 .long 182528256,1777043520,2877432650,3265356744 .long 1874708224,3503451415,3305285752,363511674 .long 1606117888,3487855781,1093350906,2384367825 .long 197121,67569157,134941193,202313229 .long 67569157,134941193,202313229,197121 .long 134941193,202313229,197121,67569157 .long 202313229,197121,67569157,134941193 .long 33619971,100992007,168364043,235736079 .long 235736079,33619971,100992007,168364043 .long 168364043,235736079,33619971,100992007 .long 100992007,168364043,235736079,33619971 .long 50462976,117835012,185207048,252579084 .long 252314880,51251460,117574920,184942860 .long 184682752,252054788,50987272,118359308 .long 118099200,185467140,251790600,50727180 .long 2946363062,528716217,1300004225,1881839624 .long 1532713819,1532713819,1532713819,1532713819 .long 3602276352,4288629033,3737020424,4153884961 .long 1354558464,32357713,2958822624,3775749553 .long 1201988352,132424512,1572796698,503232858 .long 2213177600,1597421020,4103937655,675398315 .long 2749646592,4273543773,1511898873,121693092 .long 3040248576,1103263732,2871565598,1608280554 .long 2236667136,2588920351,482954393,64377734 .long 3069987328,291237287,2117370568,3650299247 .long 533321216,3573750986,2572112006,1401264716 .long 1339849704,2721158661,548607111,3445553514 .long 2128193280,3054596040,2183486460,1257083700 .long 655635200,1165381986,3923443150,2344132524 .long 190078720,256924420,290342170,357187870 .long 1610966272,2263057382,4103205268,309794674 .long 2592527872,2233205587,1335446729,3402964816 .long 3973531904,3225098121,3002836325,1918774430 .long 3870401024,2102906079,2284471353,4117666579 .long 617007872,1021508343,366931923,691083277 .long 2528395776,3491914898,2968704004,1613121270 .long 3445188352,3247741094,844474987,4093578302 .long 651481088,1190302358,1689581232,574775300 .long 4289380608,206939853,2555985458,2489840491 .long 2130264064,327674451,3566485037,3349835193 .long 2470714624,316102159,3636825756,3393945945 .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 .byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 .byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 .byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 .byte 118,101,114,115,105,116,121,41,0 .align 64 .type _vpaes_preheat,@function .align 16 _vpaes_preheat: addl (%esp),%ebp movdqa -48(%ebp),%xmm7 movdqa -16(%ebp),%xmm6 ret .size _vpaes_preheat,.-_vpaes_preheat .type _vpaes_encrypt_core,@function .align 16 _vpaes_encrypt_core: movl $16,%ecx movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 pand %xmm6,%xmm0 movdqu (%edx),%xmm5 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 pxor %xmm5,%xmm2 psrld $4,%xmm1 addl $16,%edx .byte 102,15,56,0,193 leal 192(%ebp),%ebx pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,226 .byte 102,15,56,0,195 pxor %xmm5,%xmm4 movdqa 64(%ebp),%xmm5 pxor %xmm4,%xmm0 movdqa -64(%ebx,%ecx,1),%xmm1 .byte 102,15,56,0,234 movdqa 80(%ebp),%xmm2 movdqa (%ebx,%ecx,1),%xmm4 .byte 102,15,56,0,211 movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 .byte 102,15,56,0,220 addl $16,%ecx pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx subl $1,%eax pxor %xmm3,%xmm0 .L000enc_entry: movdqa %xmm6,%xmm1 movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 .byte 102,15,56,0,232 movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm7,%xmm4 pxor %xmm5,%xmm3 .byte 102,15,56,0,224 movdqa %xmm7,%xmm2 pxor %xmm5,%xmm4 .byte 102,15,56,0,211 movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%edx),%xmm5 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 movdqa 112(%ebp),%xmm0 .byte 102,15,56,0,226 pxor %xmm5,%xmm4 .byte 102,15,56,0,195 movdqa 64(%ebx,%ecx,1),%xmm1 pxor %xmm4,%xmm0 .byte 102,15,56,0,193 ret .size _vpaes_encrypt_core,.-_vpaes_encrypt_core .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: leal 608(%ebp),%ebx movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 movl %eax,%ecx psrld $4,%xmm1 movdqu (%edx),%xmm5 shll $4,%ecx pand %xmm6,%xmm0 .byte 102,15,56,0,208 movdqa -48(%ebx),%xmm0 xorl $48,%ecx .byte 102,15,56,0,193 andl $48,%ecx pxor %xmm5,%xmm2 movdqa 176(%ebp),%xmm5 pxor %xmm2,%xmm0 addl $16,%edx leal -352(%ebx,%ecx,1),%ecx jmp .L002dec_entry .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa (%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 32(%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 64(%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 addl $16,%edx .byte 102,15,58,15,237,12 pxor %xmm1,%xmm0 subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 pand %xmm6,%xmm0 psrld $4,%xmm1 .byte 102,15,56,0,208 movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm7,%xmm4 pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%edx),%xmm0 pxor %xmm1,%xmm3 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%ebx),%xmm0 movdqa (%ecx),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 ret .size _vpaes_decrypt_core,.-_vpaes_decrypt_core .type _vpaes_schedule_core,@function .align 16 _vpaes_schedule_core: addl (%esp),%ebp movdqu (%esi),%xmm0 movdqa 320(%ebp),%xmm2 movdqa %xmm0,%xmm3 leal (%ebp),%ebx movdqa %xmm2,4(%esp) call _vpaes_schedule_transform movdqa %xmm0,%xmm7 testl %edi,%edi jnz .L004schedule_am_decrypting movdqu %xmm0,(%edx) jmp .L005schedule_go .L004schedule_am_decrypting: movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,217 movdqu %xmm3,(%edx) xorl $48,%ecx .L005schedule_go: cmpl $192,%eax ja .L006schedule_256 je .L007schedule_192 .L008schedule_128: movl $10,%eax .L009loop_schedule_128: call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle jmp .L009loop_schedule_128 .align 16 .L007schedule_192: movdqu 8(%esi),%xmm0 call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 movl $4,%eax .L011loop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 call _vpaes_schedule_mangle call _vpaes_schedule_192_smear call _vpaes_schedule_mangle call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp .L011loop_schedule_192 .align 16 .L006schedule_256: movdqu 16(%esi),%xmm0 call _vpaes_schedule_transform movl $7,%eax .L012loop_schedule_256: call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle pshufd $255,%xmm0,%xmm0 movdqa %xmm7,20(%esp) movdqa %xmm6,%xmm7 call .L_vpaes_schedule_low_round movdqa 20(%esp),%xmm7 jmp .L012loop_schedule_256 .align 16 .L010schedule_mangle_last: leal 384(%ebp),%ebx testl %edi,%edi jnz .L013schedule_mangle_last_dec movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,193 leal 352(%ebp),%ebx addl $32,%edx .L013schedule_mangle_last_dec: addl $-16,%edx pxor 336(%ebp),%xmm0 call _vpaes_schedule_transform movdqu %xmm0,(%edx) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 ret .size _vpaes_schedule_core,.-_vpaes_schedule_core .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 pxor %xmm1,%xmm6 pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 movhlps %xmm1,%xmm6 ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear .type _vpaes_schedule_round,@function .align 16 _vpaes_schedule_round: movdqa 8(%esp),%xmm2 pxor %xmm1,%xmm1 .byte 102,15,58,15,202,15 .byte 102,15,58,15,210,15 pxor %xmm1,%xmm7 pshufd $255,%xmm0,%xmm0 .byte 102,15,58,15,192,1 movdqa %xmm2,8(%esp) .L_vpaes_schedule_low_round: movdqa %xmm7,%xmm1 pslldq $4,%xmm7 pxor %xmm1,%xmm7 movdqa %xmm7,%xmm1 pslldq $8,%xmm7 pxor %xmm1,%xmm7 pxor 336(%ebp),%xmm7 movdqa -16(%ebp),%xmm4 movdqa -48(%ebp),%xmm5 movdqa %xmm4,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm4,%xmm0 movdqa -32(%ebp),%xmm2 .byte 102,15,56,0,208 pxor %xmm1,%xmm0 movdqa %xmm5,%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 movdqa %xmm5,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm5,%xmm2 .byte 102,15,56,0,211 pxor %xmm0,%xmm2 movdqa %xmm5,%xmm3 .byte 102,15,56,0,220 pxor %xmm1,%xmm3 movdqa 32(%ebp),%xmm4 .byte 102,15,56,0,226 movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 pxor %xmm7,%xmm0 movdqa %xmm0,%xmm7 ret .size _vpaes_schedule_round,.-_vpaes_schedule_round .type _vpaes_schedule_transform,@function .align 16 _vpaes_schedule_transform: movdqa -16(%ebp),%xmm2 movdqa %xmm2,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm2,%xmm0 movdqa (%ebx),%xmm2 .byte 102,15,56,0,208 movdqa 16(%ebx),%xmm0 .byte 102,15,56,0,193 pxor %xmm2,%xmm0 ret .size _vpaes_schedule_transform,.-_vpaes_schedule_transform .type _vpaes_schedule_mangle,@function .align 16 _vpaes_schedule_mangle: movdqa %xmm0,%xmm4 movdqa 128(%ebp),%xmm5 testl %edi,%edi jnz .L014schedule_mangle_dec addl $16,%edx pxor 336(%ebp),%xmm4 .byte 102,15,56,0,229 movdqa %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 jmp .L015schedule_mangle_both .align 16 .L014schedule_mangle_dec: movdqa -16(%ebp),%xmm2 leal 416(%ebp),%esi movdqa %xmm2,%xmm1 pandn %xmm4,%xmm1 psrld $4,%xmm1 pand %xmm2,%xmm4 movdqa (%esi),%xmm2 .byte 102,15,56,0,212 movdqa 16(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 32(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 48(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 64(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 80(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 96(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 112(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 addl $-16,%edx .L015schedule_mangle_both: movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,217 addl $-16,%ecx andl $48,%ecx movdqu %xmm3,(%edx) ret .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle .globl vpaes_set_encrypt_key .type vpaes_set_encrypt_key,@function .align 16 vpaes_set_encrypt_key: .L_vpaes_set_encrypt_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%eax andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movl %eax,%ebx shrl $5,%ebx addl $5,%ebx movl %ebx,240(%edx) movl $48,%ecx movl $0,%edi leal .L_vpaes_consts+0x30-.L016pic_point,%ebp call _vpaes_schedule_core .L016pic_point: movl 48(%esp),%esp xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin .globl vpaes_set_decrypt_key .type vpaes_set_decrypt_key,@function .align 16 vpaes_set_decrypt_key: .L_vpaes_set_decrypt_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%eax andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movl %eax,%ebx shrl $5,%ebx addl $5,%ebx movl %ebx,240(%edx) shll $4,%ebx leal 16(%edx,%ebx,1),%edx movl $1,%edi movl %eax,%ecx shrl $1,%ecx andl $32,%ecx xorl $32,%ecx leal .L_vpaes_consts+0x30-.L017pic_point,%ebp call _vpaes_schedule_core .L017pic_point: movl 48(%esp),%esp xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin .globl vpaes_encrypt .type vpaes_encrypt,@function .align 16 vpaes_encrypt: .L_vpaes_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi leal .L_vpaes_consts+0x30-.L018pic_point,%ebp call _vpaes_preheat .L018pic_point: movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%edi andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movdqu (%esi),%xmm0 call _vpaes_encrypt_core movdqu %xmm0,(%edi) movl 48(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_encrypt,.-.L_vpaes_encrypt_begin .globl vpaes_decrypt .type vpaes_decrypt,@function .align 16 vpaes_decrypt: .L_vpaes_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi leal .L_vpaes_consts+0x30-.L019pic_point,%ebp call _vpaes_preheat .L019pic_point: movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%edi andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movdqu (%esi),%xmm0 call _vpaes_decrypt_core movdqu %xmm0,(%edi) movl 48(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_decrypt,.-.L_vpaes_decrypt_begin .globl vpaes_cbc_encrypt .type vpaes_cbc_encrypt,@function .align 16 vpaes_cbc_encrypt: .L_vpaes_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx subl $16,%eax jc .L020cbc_abort leal -56(%esp),%ebx movl 36(%esp),%ebp andl $-16,%ebx movl 40(%esp),%ecx xchgl %esp,%ebx movdqu (%ebp),%xmm1 subl %esi,%edi movl %ebx,48(%esp) movl %edi,(%esp) movl %edx,4(%esp) movl %ebp,8(%esp) movl %eax,%edi leal .L_vpaes_consts+0x30-.L021pic_point,%ebp call _vpaes_preheat .L021pic_point: cmpl $0,%ecx je .L022cbc_dec_loop jmp .L023cbc_enc_loop .align 16 .L023cbc_enc_loop: movdqu (%esi),%xmm0 pxor %xmm1,%xmm0 call _vpaes_encrypt_core movl (%esp),%ebx movl 4(%esp),%edx movdqa %xmm0,%xmm1 movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi jnc .L023cbc_enc_loop jmp .L024cbc_done .align 16 .L022cbc_dec_loop: movdqu (%esi),%xmm0 movdqa %xmm1,16(%esp) movdqa %xmm0,32(%esp) call _vpaes_decrypt_core movl (%esp),%ebx movl 4(%esp),%edx pxor 16(%esp),%xmm0 movdqa 32(%esp),%xmm1 movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi jnc .L022cbc_dec_loop .L024cbc_done: movl 8(%esp),%ebx movl 48(%esp),%esp movdqu %xmm1,(%ebx) .L020cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin #else .file "vpaes-x86.S" .text .align 64 .L_vpaes_consts: .long 218628480,235210255,168496130,67568393 .long 252381056,17041926,33884169,51187212 .long 252645135,252645135,252645135,252645135 .long 1512730624,3266504856,1377990664,3401244816 .long 830229760,1275146365,2969422977,3447763452 .long 3411033600,2979783055,338359620,2782886510 .long 4209124096,907596821,221174255,1006095553 .long 191964160,3799684038,3164090317,1589111125 .long 182528256,1777043520,2877432650,3265356744 .long 1874708224,3503451415,3305285752,363511674 .long 1606117888,3487855781,1093350906,2384367825 .long 197121,67569157,134941193,202313229 .long 67569157,134941193,202313229,197121 .long 134941193,202313229,197121,67569157 .long 202313229,197121,67569157,134941193 .long 33619971,100992007,168364043,235736079 .long 235736079,33619971,100992007,168364043 .long 168364043,235736079,33619971,100992007 .long 100992007,168364043,235736079,33619971 .long 50462976,117835012,185207048,252579084 .long 252314880,51251460,117574920,184942860 .long 184682752,252054788,50987272,118359308 .long 118099200,185467140,251790600,50727180 .long 2946363062,528716217,1300004225,1881839624 .long 1532713819,1532713819,1532713819,1532713819 .long 3602276352,4288629033,3737020424,4153884961 .long 1354558464,32357713,2958822624,3775749553 .long 1201988352,132424512,1572796698,503232858 .long 2213177600,1597421020,4103937655,675398315 .long 2749646592,4273543773,1511898873,121693092 .long 3040248576,1103263732,2871565598,1608280554 .long 2236667136,2588920351,482954393,64377734 .long 3069987328,291237287,2117370568,3650299247 .long 533321216,3573750986,2572112006,1401264716 .long 1339849704,2721158661,548607111,3445553514 .long 2128193280,3054596040,2183486460,1257083700 .long 655635200,1165381986,3923443150,2344132524 .long 190078720,256924420,290342170,357187870 .long 1610966272,2263057382,4103205268,309794674 .long 2592527872,2233205587,1335446729,3402964816 .long 3973531904,3225098121,3002836325,1918774430 .long 3870401024,2102906079,2284471353,4117666579 .long 617007872,1021508343,366931923,691083277 .long 2528395776,3491914898,2968704004,1613121270 .long 3445188352,3247741094,844474987,4093578302 .long 651481088,1190302358,1689581232,574775300 .long 4289380608,206939853,2555985458,2489840491 .long 2130264064,327674451,3566485037,3349835193 .long 2470714624,316102159,3636825756,3393945945 .byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 .byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 .byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 .byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 .byte 118,101,114,115,105,116,121,41,0 .align 64 .type _vpaes_preheat,@function .align 16 _vpaes_preheat: addl (%esp),%ebp movdqa -48(%ebp),%xmm7 movdqa -16(%ebp),%xmm6 ret .size _vpaes_preheat,.-_vpaes_preheat .type _vpaes_encrypt_core,@function .align 16 _vpaes_encrypt_core: movl $16,%ecx movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 pand %xmm6,%xmm0 movdqu (%edx),%xmm5 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 pxor %xmm5,%xmm2 psrld $4,%xmm1 addl $16,%edx .byte 102,15,56,0,193 leal 192(%ebp),%ebx pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,226 .byte 102,15,56,0,195 pxor %xmm5,%xmm4 movdqa 64(%ebp),%xmm5 pxor %xmm4,%xmm0 movdqa -64(%ebx,%ecx,1),%xmm1 .byte 102,15,56,0,234 movdqa 80(%ebp),%xmm2 movdqa (%ebx,%ecx,1),%xmm4 .byte 102,15,56,0,211 movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 .byte 102,15,56,0,220 addl $16,%ecx pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx subl $1,%eax pxor %xmm3,%xmm0 .L000enc_entry: movdqa %xmm6,%xmm1 movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 .byte 102,15,56,0,232 movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm7,%xmm4 pxor %xmm5,%xmm3 .byte 102,15,56,0,224 movdqa %xmm7,%xmm2 pxor %xmm5,%xmm4 .byte 102,15,56,0,211 movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%edx),%xmm5 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 movdqa 112(%ebp),%xmm0 .byte 102,15,56,0,226 pxor %xmm5,%xmm4 .byte 102,15,56,0,195 movdqa 64(%ebx,%ecx,1),%xmm1 pxor %xmm4,%xmm0 .byte 102,15,56,0,193 ret .size _vpaes_encrypt_core,.-_vpaes_encrypt_core .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: leal 608(%ebp),%ebx movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 movl %eax,%ecx psrld $4,%xmm1 movdqu (%edx),%xmm5 shll $4,%ecx pand %xmm6,%xmm0 .byte 102,15,56,0,208 movdqa -48(%ebx),%xmm0 xorl $48,%ecx .byte 102,15,56,0,193 andl $48,%ecx pxor %xmm5,%xmm2 movdqa 176(%ebp),%xmm5 pxor %xmm2,%xmm0 addl $16,%edx leal -352(%ebx,%ecx,1),%ecx jmp .L002dec_entry .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa (%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 32(%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 movdqa 64(%ebx),%xmm4 pxor %xmm1,%xmm0 movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 .byte 102,15,56,0,197 .byte 102,15,56,0,203 pxor %xmm4,%xmm0 addl $16,%edx .byte 102,15,58,15,237,12 pxor %xmm1,%xmm0 subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 pand %xmm6,%xmm0 psrld $4,%xmm1 .byte 102,15,56,0,208 movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 .byte 102,15,56,0,217 movdqa %xmm7,%xmm4 pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 .byte 102,15,56,0,220 movdqu (%edx),%xmm0 pxor %xmm1,%xmm3 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%ebx),%xmm0 movdqa (%ecx),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 ret .size _vpaes_decrypt_core,.-_vpaes_decrypt_core .type _vpaes_schedule_core,@function .align 16 _vpaes_schedule_core: addl (%esp),%ebp movdqu (%esi),%xmm0 movdqa 320(%ebp),%xmm2 movdqa %xmm0,%xmm3 leal (%ebp),%ebx movdqa %xmm2,4(%esp) call _vpaes_schedule_transform movdqa %xmm0,%xmm7 testl %edi,%edi jnz .L004schedule_am_decrypting movdqu %xmm0,(%edx) jmp .L005schedule_go .L004schedule_am_decrypting: movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,217 movdqu %xmm3,(%edx) xorl $48,%ecx .L005schedule_go: cmpl $192,%eax ja .L006schedule_256 je .L007schedule_192 .L008schedule_128: movl $10,%eax .L009loop_schedule_128: call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle jmp .L009loop_schedule_128 .align 16 .L007schedule_192: movdqu 8(%esi),%xmm0 call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 movl $4,%eax .L011loop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 call _vpaes_schedule_mangle call _vpaes_schedule_192_smear call _vpaes_schedule_mangle call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp .L011loop_schedule_192 .align 16 .L006schedule_256: movdqu 16(%esi),%xmm0 call _vpaes_schedule_transform movl $7,%eax .L012loop_schedule_256: call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decl %eax jz .L010schedule_mangle_last call _vpaes_schedule_mangle pshufd $255,%xmm0,%xmm0 movdqa %xmm7,20(%esp) movdqa %xmm6,%xmm7 call .L_vpaes_schedule_low_round movdqa 20(%esp),%xmm7 jmp .L012loop_schedule_256 .align 16 .L010schedule_mangle_last: leal 384(%ebp),%ebx testl %edi,%edi jnz .L013schedule_mangle_last_dec movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,193 leal 352(%ebp),%ebx addl $32,%edx .L013schedule_mangle_last_dec: addl $-16,%edx pxor 336(%ebp),%xmm0 call _vpaes_schedule_transform movdqu %xmm0,(%edx) pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 ret .size _vpaes_schedule_core,.-_vpaes_schedule_core .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 pxor %xmm1,%xmm6 pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 movhlps %xmm1,%xmm6 ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear .type _vpaes_schedule_round,@function .align 16 _vpaes_schedule_round: movdqa 8(%esp),%xmm2 pxor %xmm1,%xmm1 .byte 102,15,58,15,202,15 .byte 102,15,58,15,210,15 pxor %xmm1,%xmm7 pshufd $255,%xmm0,%xmm0 .byte 102,15,58,15,192,1 movdqa %xmm2,8(%esp) .L_vpaes_schedule_low_round: movdqa %xmm7,%xmm1 pslldq $4,%xmm7 pxor %xmm1,%xmm7 movdqa %xmm7,%xmm1 pslldq $8,%xmm7 pxor %xmm1,%xmm7 pxor 336(%ebp),%xmm7 movdqa -16(%ebp),%xmm4 movdqa -48(%ebp),%xmm5 movdqa %xmm4,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm4,%xmm0 movdqa -32(%ebp),%xmm2 .byte 102,15,56,0,208 pxor %xmm1,%xmm0 movdqa %xmm5,%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 movdqa %xmm5,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm5,%xmm2 .byte 102,15,56,0,211 pxor %xmm0,%xmm2 movdqa %xmm5,%xmm3 .byte 102,15,56,0,220 pxor %xmm1,%xmm3 movdqa 32(%ebp),%xmm4 .byte 102,15,56,0,226 movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 pxor %xmm7,%xmm0 movdqa %xmm0,%xmm7 ret .size _vpaes_schedule_round,.-_vpaes_schedule_round .type _vpaes_schedule_transform,@function .align 16 _vpaes_schedule_transform: movdqa -16(%ebp),%xmm2 movdqa %xmm2,%xmm1 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm2,%xmm0 movdqa (%ebx),%xmm2 .byte 102,15,56,0,208 movdqa 16(%ebx),%xmm0 .byte 102,15,56,0,193 pxor %xmm2,%xmm0 ret .size _vpaes_schedule_transform,.-_vpaes_schedule_transform .type _vpaes_schedule_mangle,@function .align 16 _vpaes_schedule_mangle: movdqa %xmm0,%xmm4 movdqa 128(%ebp),%xmm5 testl %edi,%edi jnz .L014schedule_mangle_dec addl $16,%edx pxor 336(%ebp),%xmm4 .byte 102,15,56,0,229 movdqa %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 .byte 102,15,56,0,229 pxor %xmm4,%xmm3 jmp .L015schedule_mangle_both .align 16 .L014schedule_mangle_dec: movdqa -16(%ebp),%xmm2 leal 416(%ebp),%esi movdqa %xmm2,%xmm1 pandn %xmm4,%xmm1 psrld $4,%xmm1 pand %xmm2,%xmm4 movdqa (%esi),%xmm2 .byte 102,15,56,0,212 movdqa 16(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 32(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 48(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 64(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 80(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 .byte 102,15,56,0,221 movdqa 96(%esi),%xmm2 .byte 102,15,56,0,212 pxor %xmm3,%xmm2 movdqa 112(%esi),%xmm3 .byte 102,15,56,0,217 pxor %xmm2,%xmm3 addl $-16,%edx .L015schedule_mangle_both: movdqa 256(%ebp,%ecx,1),%xmm1 .byte 102,15,56,0,217 addl $-16,%ecx andl $48,%ecx movdqu %xmm3,(%edx) ret .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle .globl vpaes_set_encrypt_key .type vpaes_set_encrypt_key,@function .align 16 vpaes_set_encrypt_key: .L_vpaes_set_encrypt_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%eax andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movl %eax,%ebx shrl $5,%ebx addl $5,%ebx movl %ebx,240(%edx) movl $48,%ecx movl $0,%edi leal .L_vpaes_consts+0x30-.L016pic_point,%ebp call _vpaes_schedule_core .L016pic_point: movl 48(%esp),%esp xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin .globl vpaes_set_decrypt_key .type vpaes_set_decrypt_key,@function .align 16 vpaes_set_decrypt_key: .L_vpaes_set_decrypt_key_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%eax andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movl %eax,%ebx shrl $5,%ebx addl $5,%ebx movl %ebx,240(%edx) shll $4,%ebx leal 16(%edx,%ebx,1),%edx movl $1,%edi movl %eax,%ecx shrl $1,%ecx andl $32,%ecx xorl $32,%ecx leal .L_vpaes_consts+0x30-.L017pic_point,%ebp call _vpaes_schedule_core .L017pic_point: movl 48(%esp),%esp xorl %eax,%eax popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin .globl vpaes_encrypt .type vpaes_encrypt,@function .align 16 vpaes_encrypt: .L_vpaes_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi leal .L_vpaes_consts+0x30-.L018pic_point,%ebp call _vpaes_preheat .L018pic_point: movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%edi andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movdqu (%esi),%xmm0 call _vpaes_encrypt_core movdqu %xmm0,(%edi) movl 48(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_encrypt,.-.L_vpaes_encrypt_begin .globl vpaes_decrypt .type vpaes_decrypt,@function .align 16 vpaes_decrypt: .L_vpaes_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi leal .L_vpaes_consts+0x30-.L019pic_point,%ebp call _vpaes_preheat .L019pic_point: movl 20(%esp),%esi leal -56(%esp),%ebx movl 24(%esp),%edi andl $-16,%ebx movl 28(%esp),%edx xchgl %esp,%ebx movl %ebx,48(%esp) movdqu (%esi),%xmm0 call _vpaes_decrypt_core movdqu %xmm0,(%edi) movl 48(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_decrypt,.-.L_vpaes_decrypt_begin .globl vpaes_cbc_encrypt .type vpaes_cbc_encrypt,@function .align 16 vpaes_cbc_encrypt: .L_vpaes_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx subl $16,%eax jc .L020cbc_abort leal -56(%esp),%ebx movl 36(%esp),%ebp andl $-16,%ebx movl 40(%esp),%ecx xchgl %esp,%ebx movdqu (%ebp),%xmm1 subl %esi,%edi movl %ebx,48(%esp) movl %edi,(%esp) movl %edx,4(%esp) movl %ebp,8(%esp) movl %eax,%edi leal .L_vpaes_consts+0x30-.L021pic_point,%ebp call _vpaes_preheat .L021pic_point: cmpl $0,%ecx je .L022cbc_dec_loop jmp .L023cbc_enc_loop .align 16 .L023cbc_enc_loop: movdqu (%esi),%xmm0 pxor %xmm1,%xmm0 call _vpaes_encrypt_core movl (%esp),%ebx movl 4(%esp),%edx movdqa %xmm0,%xmm1 movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi jnc .L023cbc_enc_loop jmp .L024cbc_done .align 16 .L022cbc_dec_loop: movdqu (%esi),%xmm0 movdqa %xmm1,16(%esp) movdqa %xmm0,32(%esp) call _vpaes_decrypt_core movl (%esp),%ebx movl 4(%esp),%edx pxor 16(%esp),%xmm0 movdqa 32(%esp),%xmm1 movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi jnc .L022cbc_dec_loop .L024cbc_done: movl 8(%esp),%ebx movl 48(%esp),%esp movdqu %xmm1,(%ebx) .L020cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret .size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin #endif Index: head/secure/lib/libcrypto/i386/wp-mmx.S =================================================================== --- head/secure/lib/libcrypto/i386/wp-mmx.S (revision 299480) +++ head/secure/lib/libcrypto/i386/wp-mmx.S (revision 299481) @@ -1,2218 +1,2219 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from wp-mmx.pl. #ifdef PIC .file "wp-mmx.S" .text .globl whirlpool_block_mmx .type whirlpool_block_mmx,@function .align 16 whirlpool_block_mmx: .L_whirlpool_block_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%ebp movl %esp,%eax subl $148,%esp andl $-64,%esp leal 128(%esp),%ebx movl %esi,(%ebx) movl %edi,4(%ebx) movl %ebp,8(%ebx) movl %eax,16(%ebx) call .L000pic_point .L000pic_point: popl %ebp leal .L001table-.L000pic_point(%ebp),%ebp xorl %ecx,%ecx xorl %edx,%edx movq (%esi),%mm0 movq 8(%esi),%mm1 movq 16(%esi),%mm2 movq 24(%esi),%mm3 movq 32(%esi),%mm4 movq 40(%esi),%mm5 movq 48(%esi),%mm6 movq 56(%esi),%mm7 .L002outerloop: movq %mm0,(%esp) movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm4,32(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) pxor (%edi),%mm0 pxor 8(%edi),%mm1 pxor 16(%edi),%mm2 pxor 24(%edi),%mm3 pxor 32(%edi),%mm4 pxor 40(%edi),%mm5 pxor 48(%edi),%mm6 pxor 56(%edi),%mm7 movq %mm0,64(%esp) movq %mm1,72(%esp) movq %mm2,80(%esp) movq %mm3,88(%esp) movq %mm4,96(%esp) movq %mm5,104(%esp) movq %mm6,112(%esp) movq %mm7,120(%esp) xorl %esi,%esi movl %esi,12(%ebx) .align 16 .L003round: movq 4096(%ebp,%esi,8),%mm0 movl (%esp),%eax movl 4(%esp),%ebx movzbl %al,%ecx movzbl %ah,%edx shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 movq 7(%ebp,%edi,8),%mm1 movl 8(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx movq 6(%ebp,%esi,8),%mm2 movq 5(%ebp,%edi,8),%mm3 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx movq 4(%ebp,%esi,8),%mm4 movq 3(%ebp,%edi,8),%mm5 movl 12(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx movq 2(%ebp,%esi,8),%mm6 movq 1(%ebp,%edi,8),%mm7 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 movl 16(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 movl 20(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 movl 24(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 movl 28(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 movl 32(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 movl 36(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 movl 40(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 movl 44(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 movl 48(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 movl 52(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 movl 56(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 movl 60(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 movl 64(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 movl 68(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 movq %mm0,(%esp) movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm4,32(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 pxor 7(%ebp,%edi,8),%mm1 movl 72(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm2 pxor 5(%ebp,%edi,8),%mm3 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm4 pxor 3(%ebp,%edi,8),%mm5 movl 76(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm6 pxor 1(%ebp,%edi,8),%mm7 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 movl 80(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 movl 84(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 movl 88(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 movl 92(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 movl 96(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 movl 100(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 movl 104(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 movl 108(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 movl 112(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 movl 116(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 movl 120(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 movl 124(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 leal 128(%esp),%ebx movl 12(%ebx),%esi addl $1,%esi cmpl $10,%esi je .L004roundsdone movl %esi,12(%ebx) movq %mm0,64(%esp) movq %mm1,72(%esp) movq %mm2,80(%esp) movq %mm3,88(%esp) movq %mm4,96(%esp) movq %mm5,104(%esp) movq %mm6,112(%esp) movq %mm7,120(%esp) jmp .L003round .align 16 .L004roundsdone: movl (%ebx),%esi movl 4(%ebx),%edi movl 8(%ebx),%eax pxor (%edi),%mm0 pxor 8(%edi),%mm1 pxor 16(%edi),%mm2 pxor 24(%edi),%mm3 pxor 32(%edi),%mm4 pxor 40(%edi),%mm5 pxor 48(%edi),%mm6 pxor 56(%edi),%mm7 pxor (%esi),%mm0 pxor 8(%esi),%mm1 pxor 16(%esi),%mm2 pxor 24(%esi),%mm3 pxor 32(%esi),%mm4 pxor 40(%esi),%mm5 pxor 48(%esi),%mm6 pxor 56(%esi),%mm7 movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) leal 64(%edi),%edi subl $1,%eax jz .L005alldone movl %edi,4(%ebx) movl %eax,8(%ebx) jmp .L002outerloop .L005alldone: emms movl 16(%ebx),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001table: .byte 24,24,96,24,192,120,48,216 .byte 24,24,96,24,192,120,48,216 .byte 35,35,140,35,5,175,70,38 .byte 35,35,140,35,5,175,70,38 .byte 198,198,63,198,126,249,145,184 .byte 198,198,63,198,126,249,145,184 .byte 232,232,135,232,19,111,205,251 .byte 232,232,135,232,19,111,205,251 .byte 135,135,38,135,76,161,19,203 .byte 135,135,38,135,76,161,19,203 .byte 184,184,218,184,169,98,109,17 .byte 184,184,218,184,169,98,109,17 .byte 1,1,4,1,8,5,2,9 .byte 1,1,4,1,8,5,2,9 .byte 79,79,33,79,66,110,158,13 .byte 79,79,33,79,66,110,158,13 .byte 54,54,216,54,173,238,108,155 .byte 54,54,216,54,173,238,108,155 .byte 166,166,162,166,89,4,81,255 .byte 166,166,162,166,89,4,81,255 .byte 210,210,111,210,222,189,185,12 .byte 210,210,111,210,222,189,185,12 .byte 245,245,243,245,251,6,247,14 .byte 245,245,243,245,251,6,247,14 .byte 121,121,249,121,239,128,242,150 .byte 121,121,249,121,239,128,242,150 .byte 111,111,161,111,95,206,222,48 .byte 111,111,161,111,95,206,222,48 .byte 145,145,126,145,252,239,63,109 .byte 145,145,126,145,252,239,63,109 .byte 82,82,85,82,170,7,164,248 .byte 82,82,85,82,170,7,164,248 .byte 96,96,157,96,39,253,192,71 .byte 96,96,157,96,39,253,192,71 .byte 188,188,202,188,137,118,101,53 .byte 188,188,202,188,137,118,101,53 .byte 155,155,86,155,172,205,43,55 .byte 155,155,86,155,172,205,43,55 .byte 142,142,2,142,4,140,1,138 .byte 142,142,2,142,4,140,1,138 .byte 163,163,182,163,113,21,91,210 .byte 163,163,182,163,113,21,91,210 .byte 12,12,48,12,96,60,24,108 .byte 12,12,48,12,96,60,24,108 .byte 123,123,241,123,255,138,246,132 .byte 123,123,241,123,255,138,246,132 .byte 53,53,212,53,181,225,106,128 .byte 53,53,212,53,181,225,106,128 .byte 29,29,116,29,232,105,58,245 .byte 29,29,116,29,232,105,58,245 .byte 224,224,167,224,83,71,221,179 .byte 224,224,167,224,83,71,221,179 .byte 215,215,123,215,246,172,179,33 .byte 215,215,123,215,246,172,179,33 .byte 194,194,47,194,94,237,153,156 .byte 194,194,47,194,94,237,153,156 .byte 46,46,184,46,109,150,92,67 .byte 46,46,184,46,109,150,92,67 .byte 75,75,49,75,98,122,150,41 .byte 75,75,49,75,98,122,150,41 .byte 254,254,223,254,163,33,225,93 .byte 254,254,223,254,163,33,225,93 .byte 87,87,65,87,130,22,174,213 .byte 87,87,65,87,130,22,174,213 .byte 21,21,84,21,168,65,42,189 .byte 21,21,84,21,168,65,42,189 .byte 119,119,193,119,159,182,238,232 .byte 119,119,193,119,159,182,238,232 .byte 55,55,220,55,165,235,110,146 .byte 55,55,220,55,165,235,110,146 .byte 229,229,179,229,123,86,215,158 .byte 229,229,179,229,123,86,215,158 .byte 159,159,70,159,140,217,35,19 .byte 159,159,70,159,140,217,35,19 .byte 240,240,231,240,211,23,253,35 .byte 240,240,231,240,211,23,253,35 .byte 74,74,53,74,106,127,148,32 .byte 74,74,53,74,106,127,148,32 .byte 218,218,79,218,158,149,169,68 .byte 218,218,79,218,158,149,169,68 .byte 88,88,125,88,250,37,176,162 .byte 88,88,125,88,250,37,176,162 .byte 201,201,3,201,6,202,143,207 .byte 201,201,3,201,6,202,143,207 .byte 41,41,164,41,85,141,82,124 .byte 41,41,164,41,85,141,82,124 .byte 10,10,40,10,80,34,20,90 .byte 10,10,40,10,80,34,20,90 .byte 177,177,254,177,225,79,127,80 .byte 177,177,254,177,225,79,127,80 .byte 160,160,186,160,105,26,93,201 .byte 160,160,186,160,105,26,93,201 .byte 107,107,177,107,127,218,214,20 .byte 107,107,177,107,127,218,214,20 .byte 133,133,46,133,92,171,23,217 .byte 133,133,46,133,92,171,23,217 .byte 189,189,206,189,129,115,103,60 .byte 189,189,206,189,129,115,103,60 .byte 93,93,105,93,210,52,186,143 .byte 93,93,105,93,210,52,186,143 .byte 16,16,64,16,128,80,32,144 .byte 16,16,64,16,128,80,32,144 .byte 244,244,247,244,243,3,245,7 .byte 244,244,247,244,243,3,245,7 .byte 203,203,11,203,22,192,139,221 .byte 203,203,11,203,22,192,139,221 .byte 62,62,248,62,237,198,124,211 .byte 62,62,248,62,237,198,124,211 .byte 5,5,20,5,40,17,10,45 .byte 5,5,20,5,40,17,10,45 .byte 103,103,129,103,31,230,206,120 .byte 103,103,129,103,31,230,206,120 .byte 228,228,183,228,115,83,213,151 .byte 228,228,183,228,115,83,213,151 .byte 39,39,156,39,37,187,78,2 .byte 39,39,156,39,37,187,78,2 .byte 65,65,25,65,50,88,130,115 .byte 65,65,25,65,50,88,130,115 .byte 139,139,22,139,44,157,11,167 .byte 139,139,22,139,44,157,11,167 .byte 167,167,166,167,81,1,83,246 .byte 167,167,166,167,81,1,83,246 .byte 125,125,233,125,207,148,250,178 .byte 125,125,233,125,207,148,250,178 .byte 149,149,110,149,220,251,55,73 .byte 149,149,110,149,220,251,55,73 .byte 216,216,71,216,142,159,173,86 .byte 216,216,71,216,142,159,173,86 .byte 251,251,203,251,139,48,235,112 .byte 251,251,203,251,139,48,235,112 .byte 238,238,159,238,35,113,193,205 .byte 238,238,159,238,35,113,193,205 .byte 124,124,237,124,199,145,248,187 .byte 124,124,237,124,199,145,248,187 .byte 102,102,133,102,23,227,204,113 .byte 102,102,133,102,23,227,204,113 .byte 221,221,83,221,166,142,167,123 .byte 221,221,83,221,166,142,167,123 .byte 23,23,92,23,184,75,46,175 .byte 23,23,92,23,184,75,46,175 .byte 71,71,1,71,2,70,142,69 .byte 71,71,1,71,2,70,142,69 .byte 158,158,66,158,132,220,33,26 .byte 158,158,66,158,132,220,33,26 .byte 202,202,15,202,30,197,137,212 .byte 202,202,15,202,30,197,137,212 .byte 45,45,180,45,117,153,90,88 .byte 45,45,180,45,117,153,90,88 .byte 191,191,198,191,145,121,99,46 .byte 191,191,198,191,145,121,99,46 .byte 7,7,28,7,56,27,14,63 .byte 7,7,28,7,56,27,14,63 .byte 173,173,142,173,1,35,71,172 .byte 173,173,142,173,1,35,71,172 .byte 90,90,117,90,234,47,180,176 .byte 90,90,117,90,234,47,180,176 .byte 131,131,54,131,108,181,27,239 .byte 131,131,54,131,108,181,27,239 .byte 51,51,204,51,133,255,102,182 .byte 51,51,204,51,133,255,102,182 .byte 99,99,145,99,63,242,198,92 .byte 99,99,145,99,63,242,198,92 .byte 2,2,8,2,16,10,4,18 .byte 2,2,8,2,16,10,4,18 .byte 170,170,146,170,57,56,73,147 .byte 170,170,146,170,57,56,73,147 .byte 113,113,217,113,175,168,226,222 .byte 113,113,217,113,175,168,226,222 .byte 200,200,7,200,14,207,141,198 .byte 200,200,7,200,14,207,141,198 .byte 25,25,100,25,200,125,50,209 .byte 25,25,100,25,200,125,50,209 .byte 73,73,57,73,114,112,146,59 .byte 73,73,57,73,114,112,146,59 .byte 217,217,67,217,134,154,175,95 .byte 217,217,67,217,134,154,175,95 .byte 242,242,239,242,195,29,249,49 .byte 242,242,239,242,195,29,249,49 .byte 227,227,171,227,75,72,219,168 .byte 227,227,171,227,75,72,219,168 .byte 91,91,113,91,226,42,182,185 .byte 91,91,113,91,226,42,182,185 .byte 136,136,26,136,52,146,13,188 .byte 136,136,26,136,52,146,13,188 .byte 154,154,82,154,164,200,41,62 .byte 154,154,82,154,164,200,41,62 .byte 38,38,152,38,45,190,76,11 .byte 38,38,152,38,45,190,76,11 .byte 50,50,200,50,141,250,100,191 .byte 50,50,200,50,141,250,100,191 .byte 176,176,250,176,233,74,125,89 .byte 176,176,250,176,233,74,125,89 .byte 233,233,131,233,27,106,207,242 .byte 233,233,131,233,27,106,207,242 .byte 15,15,60,15,120,51,30,119 .byte 15,15,60,15,120,51,30,119 .byte 213,213,115,213,230,166,183,51 .byte 213,213,115,213,230,166,183,51 .byte 128,128,58,128,116,186,29,244 .byte 128,128,58,128,116,186,29,244 .byte 190,190,194,190,153,124,97,39 .byte 190,190,194,190,153,124,97,39 .byte 205,205,19,205,38,222,135,235 .byte 205,205,19,205,38,222,135,235 .byte 52,52,208,52,189,228,104,137 .byte 52,52,208,52,189,228,104,137 .byte 72,72,61,72,122,117,144,50 .byte 72,72,61,72,122,117,144,50 .byte 255,255,219,255,171,36,227,84 .byte 255,255,219,255,171,36,227,84 .byte 122,122,245,122,247,143,244,141 .byte 122,122,245,122,247,143,244,141 .byte 144,144,122,144,244,234,61,100 .byte 144,144,122,144,244,234,61,100 .byte 95,95,97,95,194,62,190,157 .byte 95,95,97,95,194,62,190,157 .byte 32,32,128,32,29,160,64,61 .byte 32,32,128,32,29,160,64,61 .byte 104,104,189,104,103,213,208,15 .byte 104,104,189,104,103,213,208,15 .byte 26,26,104,26,208,114,52,202 .byte 26,26,104,26,208,114,52,202 .byte 174,174,130,174,25,44,65,183 .byte 174,174,130,174,25,44,65,183 .byte 180,180,234,180,201,94,117,125 .byte 180,180,234,180,201,94,117,125 .byte 84,84,77,84,154,25,168,206 .byte 84,84,77,84,154,25,168,206 .byte 147,147,118,147,236,229,59,127 .byte 147,147,118,147,236,229,59,127 .byte 34,34,136,34,13,170,68,47 .byte 34,34,136,34,13,170,68,47 .byte 100,100,141,100,7,233,200,99 .byte 100,100,141,100,7,233,200,99 .byte 241,241,227,241,219,18,255,42 .byte 241,241,227,241,219,18,255,42 .byte 115,115,209,115,191,162,230,204 .byte 115,115,209,115,191,162,230,204 .byte 18,18,72,18,144,90,36,130 .byte 18,18,72,18,144,90,36,130 .byte 64,64,29,64,58,93,128,122 .byte 64,64,29,64,58,93,128,122 .byte 8,8,32,8,64,40,16,72 .byte 8,8,32,8,64,40,16,72 .byte 195,195,43,195,86,232,155,149 .byte 195,195,43,195,86,232,155,149 .byte 236,236,151,236,51,123,197,223 .byte 236,236,151,236,51,123,197,223 .byte 219,219,75,219,150,144,171,77 .byte 219,219,75,219,150,144,171,77 .byte 161,161,190,161,97,31,95,192 .byte 161,161,190,161,97,31,95,192 .byte 141,141,14,141,28,131,7,145 .byte 141,141,14,141,28,131,7,145 .byte 61,61,244,61,245,201,122,200 .byte 61,61,244,61,245,201,122,200 .byte 151,151,102,151,204,241,51,91 .byte 151,151,102,151,204,241,51,91 .byte 0,0,0,0,0,0,0,0 .byte 0,0,0,0,0,0,0,0 .byte 207,207,27,207,54,212,131,249 .byte 207,207,27,207,54,212,131,249 .byte 43,43,172,43,69,135,86,110 .byte 43,43,172,43,69,135,86,110 .byte 118,118,197,118,151,179,236,225 .byte 118,118,197,118,151,179,236,225 .byte 130,130,50,130,100,176,25,230 .byte 130,130,50,130,100,176,25,230 .byte 214,214,127,214,254,169,177,40 .byte 214,214,127,214,254,169,177,40 .byte 27,27,108,27,216,119,54,195 .byte 27,27,108,27,216,119,54,195 .byte 181,181,238,181,193,91,119,116 .byte 181,181,238,181,193,91,119,116 .byte 175,175,134,175,17,41,67,190 .byte 175,175,134,175,17,41,67,190 .byte 106,106,181,106,119,223,212,29 .byte 106,106,181,106,119,223,212,29 .byte 80,80,93,80,186,13,160,234 .byte 80,80,93,80,186,13,160,234 .byte 69,69,9,69,18,76,138,87 .byte 69,69,9,69,18,76,138,87 .byte 243,243,235,243,203,24,251,56 .byte 243,243,235,243,203,24,251,56 .byte 48,48,192,48,157,240,96,173 .byte 48,48,192,48,157,240,96,173 .byte 239,239,155,239,43,116,195,196 .byte 239,239,155,239,43,116,195,196 .byte 63,63,252,63,229,195,126,218 .byte 63,63,252,63,229,195,126,218 .byte 85,85,73,85,146,28,170,199 .byte 85,85,73,85,146,28,170,199 .byte 162,162,178,162,121,16,89,219 .byte 162,162,178,162,121,16,89,219 .byte 234,234,143,234,3,101,201,233 .byte 234,234,143,234,3,101,201,233 .byte 101,101,137,101,15,236,202,106 .byte 101,101,137,101,15,236,202,106 .byte 186,186,210,186,185,104,105,3 .byte 186,186,210,186,185,104,105,3 .byte 47,47,188,47,101,147,94,74 .byte 47,47,188,47,101,147,94,74 .byte 192,192,39,192,78,231,157,142 .byte 192,192,39,192,78,231,157,142 .byte 222,222,95,222,190,129,161,96 .byte 222,222,95,222,190,129,161,96 .byte 28,28,112,28,224,108,56,252 .byte 28,28,112,28,224,108,56,252 .byte 253,253,211,253,187,46,231,70 .byte 253,253,211,253,187,46,231,70 .byte 77,77,41,77,82,100,154,31 .byte 77,77,41,77,82,100,154,31 .byte 146,146,114,146,228,224,57,118 .byte 146,146,114,146,228,224,57,118 .byte 117,117,201,117,143,188,234,250 .byte 117,117,201,117,143,188,234,250 .byte 6,6,24,6,48,30,12,54 .byte 6,6,24,6,48,30,12,54 .byte 138,138,18,138,36,152,9,174 .byte 138,138,18,138,36,152,9,174 .byte 178,178,242,178,249,64,121,75 .byte 178,178,242,178,249,64,121,75 .byte 230,230,191,230,99,89,209,133 .byte 230,230,191,230,99,89,209,133 .byte 14,14,56,14,112,54,28,126 .byte 14,14,56,14,112,54,28,126 .byte 31,31,124,31,248,99,62,231 .byte 31,31,124,31,248,99,62,231 .byte 98,98,149,98,55,247,196,85 .byte 98,98,149,98,55,247,196,85 .byte 212,212,119,212,238,163,181,58 .byte 212,212,119,212,238,163,181,58 .byte 168,168,154,168,41,50,77,129 .byte 168,168,154,168,41,50,77,129 .byte 150,150,98,150,196,244,49,82 .byte 150,150,98,150,196,244,49,82 .byte 249,249,195,249,155,58,239,98 .byte 249,249,195,249,155,58,239,98 .byte 197,197,51,197,102,246,151,163 .byte 197,197,51,197,102,246,151,163 .byte 37,37,148,37,53,177,74,16 .byte 37,37,148,37,53,177,74,16 .byte 89,89,121,89,242,32,178,171 .byte 89,89,121,89,242,32,178,171 .byte 132,132,42,132,84,174,21,208 .byte 132,132,42,132,84,174,21,208 .byte 114,114,213,114,183,167,228,197 .byte 114,114,213,114,183,167,228,197 .byte 57,57,228,57,213,221,114,236 .byte 57,57,228,57,213,221,114,236 .byte 76,76,45,76,90,97,152,22 .byte 76,76,45,76,90,97,152,22 .byte 94,94,101,94,202,59,188,148 .byte 94,94,101,94,202,59,188,148 .byte 120,120,253,120,231,133,240,159 .byte 120,120,253,120,231,133,240,159 .byte 56,56,224,56,221,216,112,229 .byte 56,56,224,56,221,216,112,229 .byte 140,140,10,140,20,134,5,152 .byte 140,140,10,140,20,134,5,152 .byte 209,209,99,209,198,178,191,23 .byte 209,209,99,209,198,178,191,23 .byte 165,165,174,165,65,11,87,228 .byte 165,165,174,165,65,11,87,228 .byte 226,226,175,226,67,77,217,161 .byte 226,226,175,226,67,77,217,161 .byte 97,97,153,97,47,248,194,78 .byte 97,97,153,97,47,248,194,78 .byte 179,179,246,179,241,69,123,66 .byte 179,179,246,179,241,69,123,66 .byte 33,33,132,33,21,165,66,52 .byte 33,33,132,33,21,165,66,52 .byte 156,156,74,156,148,214,37,8 .byte 156,156,74,156,148,214,37,8 .byte 30,30,120,30,240,102,60,238 .byte 30,30,120,30,240,102,60,238 .byte 67,67,17,67,34,82,134,97 .byte 67,67,17,67,34,82,134,97 .byte 199,199,59,199,118,252,147,177 .byte 199,199,59,199,118,252,147,177 .byte 252,252,215,252,179,43,229,79 .byte 252,252,215,252,179,43,229,79 .byte 4,4,16,4,32,20,8,36 .byte 4,4,16,4,32,20,8,36 .byte 81,81,89,81,178,8,162,227 .byte 81,81,89,81,178,8,162,227 .byte 153,153,94,153,188,199,47,37 .byte 153,153,94,153,188,199,47,37 .byte 109,109,169,109,79,196,218,34 .byte 109,109,169,109,79,196,218,34 .byte 13,13,52,13,104,57,26,101 .byte 13,13,52,13,104,57,26,101 .byte 250,250,207,250,131,53,233,121 .byte 250,250,207,250,131,53,233,121 .byte 223,223,91,223,182,132,163,105 .byte 223,223,91,223,182,132,163,105 .byte 126,126,229,126,215,155,252,169 .byte 126,126,229,126,215,155,252,169 .byte 36,36,144,36,61,180,72,25 .byte 36,36,144,36,61,180,72,25 .byte 59,59,236,59,197,215,118,254 .byte 59,59,236,59,197,215,118,254 .byte 171,171,150,171,49,61,75,154 .byte 171,171,150,171,49,61,75,154 .byte 206,206,31,206,62,209,129,240 .byte 206,206,31,206,62,209,129,240 .byte 17,17,68,17,136,85,34,153 .byte 17,17,68,17,136,85,34,153 .byte 143,143,6,143,12,137,3,131 .byte 143,143,6,143,12,137,3,131 .byte 78,78,37,78,74,107,156,4 .byte 78,78,37,78,74,107,156,4 .byte 183,183,230,183,209,81,115,102 .byte 183,183,230,183,209,81,115,102 .byte 235,235,139,235,11,96,203,224 .byte 235,235,139,235,11,96,203,224 .byte 60,60,240,60,253,204,120,193 .byte 60,60,240,60,253,204,120,193 .byte 129,129,62,129,124,191,31,253 .byte 129,129,62,129,124,191,31,253 .byte 148,148,106,148,212,254,53,64 .byte 148,148,106,148,212,254,53,64 .byte 247,247,251,247,235,12,243,28 .byte 247,247,251,247,235,12,243,28 .byte 185,185,222,185,161,103,111,24 .byte 185,185,222,185,161,103,111,24 .byte 19,19,76,19,152,95,38,139 .byte 19,19,76,19,152,95,38,139 .byte 44,44,176,44,125,156,88,81 .byte 44,44,176,44,125,156,88,81 .byte 211,211,107,211,214,184,187,5 .byte 211,211,107,211,214,184,187,5 .byte 231,231,187,231,107,92,211,140 .byte 231,231,187,231,107,92,211,140 .byte 110,110,165,110,87,203,220,57 .byte 110,110,165,110,87,203,220,57 .byte 196,196,55,196,110,243,149,170 .byte 196,196,55,196,110,243,149,170 .byte 3,3,12,3,24,15,6,27 .byte 3,3,12,3,24,15,6,27 .byte 86,86,69,86,138,19,172,220 .byte 86,86,69,86,138,19,172,220 .byte 68,68,13,68,26,73,136,94 .byte 68,68,13,68,26,73,136,94 .byte 127,127,225,127,223,158,254,160 .byte 127,127,225,127,223,158,254,160 .byte 169,169,158,169,33,55,79,136 .byte 169,169,158,169,33,55,79,136 .byte 42,42,168,42,77,130,84,103 .byte 42,42,168,42,77,130,84,103 .byte 187,187,214,187,177,109,107,10 .byte 187,187,214,187,177,109,107,10 .byte 193,193,35,193,70,226,159,135 .byte 193,193,35,193,70,226,159,135 .byte 83,83,81,83,162,2,166,241 .byte 83,83,81,83,162,2,166,241 .byte 220,220,87,220,174,139,165,114 .byte 220,220,87,220,174,139,165,114 .byte 11,11,44,11,88,39,22,83 .byte 11,11,44,11,88,39,22,83 .byte 157,157,78,157,156,211,39,1 .byte 157,157,78,157,156,211,39,1 .byte 108,108,173,108,71,193,216,43 .byte 108,108,173,108,71,193,216,43 .byte 49,49,196,49,149,245,98,164 .byte 49,49,196,49,149,245,98,164 .byte 116,116,205,116,135,185,232,243 .byte 116,116,205,116,135,185,232,243 .byte 246,246,255,246,227,9,241,21 .byte 246,246,255,246,227,9,241,21 .byte 70,70,5,70,10,67,140,76 .byte 70,70,5,70,10,67,140,76 .byte 172,172,138,172,9,38,69,165 .byte 172,172,138,172,9,38,69,165 .byte 137,137,30,137,60,151,15,181 .byte 137,137,30,137,60,151,15,181 .byte 20,20,80,20,160,68,40,180 .byte 20,20,80,20,160,68,40,180 .byte 225,225,163,225,91,66,223,186 .byte 225,225,163,225,91,66,223,186 .byte 22,22,88,22,176,78,44,166 .byte 22,22,88,22,176,78,44,166 .byte 58,58,232,58,205,210,116,247 .byte 58,58,232,58,205,210,116,247 .byte 105,105,185,105,111,208,210,6 .byte 105,105,185,105,111,208,210,6 .byte 9,9,36,9,72,45,18,65 .byte 9,9,36,9,72,45,18,65 .byte 112,112,221,112,167,173,224,215 .byte 112,112,221,112,167,173,224,215 .byte 182,182,226,182,217,84,113,111 .byte 182,182,226,182,217,84,113,111 .byte 208,208,103,208,206,183,189,30 .byte 208,208,103,208,206,183,189,30 .byte 237,237,147,237,59,126,199,214 .byte 237,237,147,237,59,126,199,214 .byte 204,204,23,204,46,219,133,226 .byte 204,204,23,204,46,219,133,226 .byte 66,66,21,66,42,87,132,104 .byte 66,66,21,66,42,87,132,104 .byte 152,152,90,152,180,194,45,44 .byte 152,152,90,152,180,194,45,44 .byte 164,164,170,164,73,14,85,237 .byte 164,164,170,164,73,14,85,237 .byte 40,40,160,40,93,136,80,117 .byte 40,40,160,40,93,136,80,117 .byte 92,92,109,92,218,49,184,134 .byte 92,92,109,92,218,49,184,134 .byte 248,248,199,248,147,63,237,107 .byte 248,248,199,248,147,63,237,107 .byte 134,134,34,134,68,164,17,194 .byte 134,134,34,134,68,164,17,194 .byte 24,35,198,232,135,184,1,79 .byte 54,166,210,245,121,111,145,82 .byte 96,188,155,142,163,12,123,53 .byte 29,224,215,194,46,75,254,87 .byte 21,119,55,229,159,240,74,218 .byte 88,201,41,10,177,160,107,133 .byte 189,93,16,244,203,62,5,103 .byte 228,39,65,139,167,125,149,216 .byte 251,238,124,102,221,23,71,158 .byte 202,45,191,7,173,90,131,51 .size whirlpool_block_mmx,.-.L_whirlpool_block_mmx_begin #else .file "wp-mmx.S" .text .globl whirlpool_block_mmx .type whirlpool_block_mmx,@function .align 16 whirlpool_block_mmx: .L_whirlpool_block_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%esi movl 24(%esp),%edi movl 28(%esp),%ebp movl %esp,%eax subl $148,%esp andl $-64,%esp leal 128(%esp),%ebx movl %esi,(%ebx) movl %edi,4(%ebx) movl %ebp,8(%ebx) movl %eax,16(%ebx) call .L000pic_point .L000pic_point: popl %ebp leal .L001table-.L000pic_point(%ebp),%ebp xorl %ecx,%ecx xorl %edx,%edx movq (%esi),%mm0 movq 8(%esi),%mm1 movq 16(%esi),%mm2 movq 24(%esi),%mm3 movq 32(%esi),%mm4 movq 40(%esi),%mm5 movq 48(%esi),%mm6 movq 56(%esi),%mm7 .L002outerloop: movq %mm0,(%esp) movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm4,32(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) pxor (%edi),%mm0 pxor 8(%edi),%mm1 pxor 16(%edi),%mm2 pxor 24(%edi),%mm3 pxor 32(%edi),%mm4 pxor 40(%edi),%mm5 pxor 48(%edi),%mm6 pxor 56(%edi),%mm7 movq %mm0,64(%esp) movq %mm1,72(%esp) movq %mm2,80(%esp) movq %mm3,88(%esp) movq %mm4,96(%esp) movq %mm5,104(%esp) movq %mm6,112(%esp) movq %mm7,120(%esp) xorl %esi,%esi movl %esi,12(%ebx) .align 16 .L003round: movq 4096(%ebp,%esi,8),%mm0 movl (%esp),%eax movl 4(%esp),%ebx movzbl %al,%ecx movzbl %ah,%edx shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 movq 7(%ebp,%edi,8),%mm1 movl 8(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx movq 6(%ebp,%esi,8),%mm2 movq 5(%ebp,%edi,8),%mm3 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx movq 4(%ebp,%esi,8),%mm4 movq 3(%ebp,%edi,8),%mm5 movl 12(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx movq 2(%ebp,%esi,8),%mm6 movq 1(%ebp,%edi,8),%mm7 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 movl 16(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 movl 20(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 movl 24(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 movl 28(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 movl 32(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 movl 36(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 movl 40(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 movl 44(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 movl 48(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 movl 52(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 movl 56(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 movl 60(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 movl 64(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 movl 68(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 movq %mm0,(%esp) movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm4,32(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 pxor 7(%ebp,%edi,8),%mm1 movl 72(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm2 pxor 5(%ebp,%edi,8),%mm3 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm4 pxor 3(%ebp,%edi,8),%mm5 movl 76(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm6 pxor 1(%ebp,%edi,8),%mm7 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 movl 80(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 movl 84(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 movl 88(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 movl 92(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 movl 96(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 movl 100(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 movl 104(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 movl 108(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 movl 112(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 movl 116(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 movl 120(%esp),%eax leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 movl 124(%esp),%ebx leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 shrl $16,%eax leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 shrl $16,%ebx leal (%ecx,%ecx,1),%esi movzbl %bl,%ecx leal (%edx,%edx,1),%edi movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 leal (%ecx,%ecx,1),%esi movzbl %al,%ecx leal (%edx,%edx,1),%edi movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 leal 128(%esp),%ebx movl 12(%ebx),%esi addl $1,%esi cmpl $10,%esi je .L004roundsdone movl %esi,12(%ebx) movq %mm0,64(%esp) movq %mm1,72(%esp) movq %mm2,80(%esp) movq %mm3,88(%esp) movq %mm4,96(%esp) movq %mm5,104(%esp) movq %mm6,112(%esp) movq %mm7,120(%esp) jmp .L003round .align 16 .L004roundsdone: movl (%ebx),%esi movl 4(%ebx),%edi movl 8(%ebx),%eax pxor (%edi),%mm0 pxor 8(%edi),%mm1 pxor 16(%edi),%mm2 pxor 24(%edi),%mm3 pxor 32(%edi),%mm4 pxor 40(%edi),%mm5 pxor 48(%edi),%mm6 pxor 56(%edi),%mm7 pxor (%esi),%mm0 pxor 8(%esi),%mm1 pxor 16(%esi),%mm2 pxor 24(%esi),%mm3 pxor 32(%esi),%mm4 pxor 40(%esi),%mm5 pxor 48(%esi),%mm6 pxor 56(%esi),%mm7 movq %mm0,(%esi) movq %mm1,8(%esi) movq %mm2,16(%esi) movq %mm3,24(%esi) movq %mm4,32(%esi) movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) leal 64(%edi),%edi subl $1,%eax jz .L005alldone movl %edi,4(%ebx) movl %eax,8(%ebx) jmp .L002outerloop .L005alldone: emms movl 16(%ebx),%esp popl %edi popl %esi popl %ebx popl %ebp ret .align 64 .L001table: .byte 24,24,96,24,192,120,48,216 .byte 24,24,96,24,192,120,48,216 .byte 35,35,140,35,5,175,70,38 .byte 35,35,140,35,5,175,70,38 .byte 198,198,63,198,126,249,145,184 .byte 198,198,63,198,126,249,145,184 .byte 232,232,135,232,19,111,205,251 .byte 232,232,135,232,19,111,205,251 .byte 135,135,38,135,76,161,19,203 .byte 135,135,38,135,76,161,19,203 .byte 184,184,218,184,169,98,109,17 .byte 184,184,218,184,169,98,109,17 .byte 1,1,4,1,8,5,2,9 .byte 1,1,4,1,8,5,2,9 .byte 79,79,33,79,66,110,158,13 .byte 79,79,33,79,66,110,158,13 .byte 54,54,216,54,173,238,108,155 .byte 54,54,216,54,173,238,108,155 .byte 166,166,162,166,89,4,81,255 .byte 166,166,162,166,89,4,81,255 .byte 210,210,111,210,222,189,185,12 .byte 210,210,111,210,222,189,185,12 .byte 245,245,243,245,251,6,247,14 .byte 245,245,243,245,251,6,247,14 .byte 121,121,249,121,239,128,242,150 .byte 121,121,249,121,239,128,242,150 .byte 111,111,161,111,95,206,222,48 .byte 111,111,161,111,95,206,222,48 .byte 145,145,126,145,252,239,63,109 .byte 145,145,126,145,252,239,63,109 .byte 82,82,85,82,170,7,164,248 .byte 82,82,85,82,170,7,164,248 .byte 96,96,157,96,39,253,192,71 .byte 96,96,157,96,39,253,192,71 .byte 188,188,202,188,137,118,101,53 .byte 188,188,202,188,137,118,101,53 .byte 155,155,86,155,172,205,43,55 .byte 155,155,86,155,172,205,43,55 .byte 142,142,2,142,4,140,1,138 .byte 142,142,2,142,4,140,1,138 .byte 163,163,182,163,113,21,91,210 .byte 163,163,182,163,113,21,91,210 .byte 12,12,48,12,96,60,24,108 .byte 12,12,48,12,96,60,24,108 .byte 123,123,241,123,255,138,246,132 .byte 123,123,241,123,255,138,246,132 .byte 53,53,212,53,181,225,106,128 .byte 53,53,212,53,181,225,106,128 .byte 29,29,116,29,232,105,58,245 .byte 29,29,116,29,232,105,58,245 .byte 224,224,167,224,83,71,221,179 .byte 224,224,167,224,83,71,221,179 .byte 215,215,123,215,246,172,179,33 .byte 215,215,123,215,246,172,179,33 .byte 194,194,47,194,94,237,153,156 .byte 194,194,47,194,94,237,153,156 .byte 46,46,184,46,109,150,92,67 .byte 46,46,184,46,109,150,92,67 .byte 75,75,49,75,98,122,150,41 .byte 75,75,49,75,98,122,150,41 .byte 254,254,223,254,163,33,225,93 .byte 254,254,223,254,163,33,225,93 .byte 87,87,65,87,130,22,174,213 .byte 87,87,65,87,130,22,174,213 .byte 21,21,84,21,168,65,42,189 .byte 21,21,84,21,168,65,42,189 .byte 119,119,193,119,159,182,238,232 .byte 119,119,193,119,159,182,238,232 .byte 55,55,220,55,165,235,110,146 .byte 55,55,220,55,165,235,110,146 .byte 229,229,179,229,123,86,215,158 .byte 229,229,179,229,123,86,215,158 .byte 159,159,70,159,140,217,35,19 .byte 159,159,70,159,140,217,35,19 .byte 240,240,231,240,211,23,253,35 .byte 240,240,231,240,211,23,253,35 .byte 74,74,53,74,106,127,148,32 .byte 74,74,53,74,106,127,148,32 .byte 218,218,79,218,158,149,169,68 .byte 218,218,79,218,158,149,169,68 .byte 88,88,125,88,250,37,176,162 .byte 88,88,125,88,250,37,176,162 .byte 201,201,3,201,6,202,143,207 .byte 201,201,3,201,6,202,143,207 .byte 41,41,164,41,85,141,82,124 .byte 41,41,164,41,85,141,82,124 .byte 10,10,40,10,80,34,20,90 .byte 10,10,40,10,80,34,20,90 .byte 177,177,254,177,225,79,127,80 .byte 177,177,254,177,225,79,127,80 .byte 160,160,186,160,105,26,93,201 .byte 160,160,186,160,105,26,93,201 .byte 107,107,177,107,127,218,214,20 .byte 107,107,177,107,127,218,214,20 .byte 133,133,46,133,92,171,23,217 .byte 133,133,46,133,92,171,23,217 .byte 189,189,206,189,129,115,103,60 .byte 189,189,206,189,129,115,103,60 .byte 93,93,105,93,210,52,186,143 .byte 93,93,105,93,210,52,186,143 .byte 16,16,64,16,128,80,32,144 .byte 16,16,64,16,128,80,32,144 .byte 244,244,247,244,243,3,245,7 .byte 244,244,247,244,243,3,245,7 .byte 203,203,11,203,22,192,139,221 .byte 203,203,11,203,22,192,139,221 .byte 62,62,248,62,237,198,124,211 .byte 62,62,248,62,237,198,124,211 .byte 5,5,20,5,40,17,10,45 .byte 5,5,20,5,40,17,10,45 .byte 103,103,129,103,31,230,206,120 .byte 103,103,129,103,31,230,206,120 .byte 228,228,183,228,115,83,213,151 .byte 228,228,183,228,115,83,213,151 .byte 39,39,156,39,37,187,78,2 .byte 39,39,156,39,37,187,78,2 .byte 65,65,25,65,50,88,130,115 .byte 65,65,25,65,50,88,130,115 .byte 139,139,22,139,44,157,11,167 .byte 139,139,22,139,44,157,11,167 .byte 167,167,166,167,81,1,83,246 .byte 167,167,166,167,81,1,83,246 .byte 125,125,233,125,207,148,250,178 .byte 125,125,233,125,207,148,250,178 .byte 149,149,110,149,220,251,55,73 .byte 149,149,110,149,220,251,55,73 .byte 216,216,71,216,142,159,173,86 .byte 216,216,71,216,142,159,173,86 .byte 251,251,203,251,139,48,235,112 .byte 251,251,203,251,139,48,235,112 .byte 238,238,159,238,35,113,193,205 .byte 238,238,159,238,35,113,193,205 .byte 124,124,237,124,199,145,248,187 .byte 124,124,237,124,199,145,248,187 .byte 102,102,133,102,23,227,204,113 .byte 102,102,133,102,23,227,204,113 .byte 221,221,83,221,166,142,167,123 .byte 221,221,83,221,166,142,167,123 .byte 23,23,92,23,184,75,46,175 .byte 23,23,92,23,184,75,46,175 .byte 71,71,1,71,2,70,142,69 .byte 71,71,1,71,2,70,142,69 .byte 158,158,66,158,132,220,33,26 .byte 158,158,66,158,132,220,33,26 .byte 202,202,15,202,30,197,137,212 .byte 202,202,15,202,30,197,137,212 .byte 45,45,180,45,117,153,90,88 .byte 45,45,180,45,117,153,90,88 .byte 191,191,198,191,145,121,99,46 .byte 191,191,198,191,145,121,99,46 .byte 7,7,28,7,56,27,14,63 .byte 7,7,28,7,56,27,14,63 .byte 173,173,142,173,1,35,71,172 .byte 173,173,142,173,1,35,71,172 .byte 90,90,117,90,234,47,180,176 .byte 90,90,117,90,234,47,180,176 .byte 131,131,54,131,108,181,27,239 .byte 131,131,54,131,108,181,27,239 .byte 51,51,204,51,133,255,102,182 .byte 51,51,204,51,133,255,102,182 .byte 99,99,145,99,63,242,198,92 .byte 99,99,145,99,63,242,198,92 .byte 2,2,8,2,16,10,4,18 .byte 2,2,8,2,16,10,4,18 .byte 170,170,146,170,57,56,73,147 .byte 170,170,146,170,57,56,73,147 .byte 113,113,217,113,175,168,226,222 .byte 113,113,217,113,175,168,226,222 .byte 200,200,7,200,14,207,141,198 .byte 200,200,7,200,14,207,141,198 .byte 25,25,100,25,200,125,50,209 .byte 25,25,100,25,200,125,50,209 .byte 73,73,57,73,114,112,146,59 .byte 73,73,57,73,114,112,146,59 .byte 217,217,67,217,134,154,175,95 .byte 217,217,67,217,134,154,175,95 .byte 242,242,239,242,195,29,249,49 .byte 242,242,239,242,195,29,249,49 .byte 227,227,171,227,75,72,219,168 .byte 227,227,171,227,75,72,219,168 .byte 91,91,113,91,226,42,182,185 .byte 91,91,113,91,226,42,182,185 .byte 136,136,26,136,52,146,13,188 .byte 136,136,26,136,52,146,13,188 .byte 154,154,82,154,164,200,41,62 .byte 154,154,82,154,164,200,41,62 .byte 38,38,152,38,45,190,76,11 .byte 38,38,152,38,45,190,76,11 .byte 50,50,200,50,141,250,100,191 .byte 50,50,200,50,141,250,100,191 .byte 176,176,250,176,233,74,125,89 .byte 176,176,250,176,233,74,125,89 .byte 233,233,131,233,27,106,207,242 .byte 233,233,131,233,27,106,207,242 .byte 15,15,60,15,120,51,30,119 .byte 15,15,60,15,120,51,30,119 .byte 213,213,115,213,230,166,183,51 .byte 213,213,115,213,230,166,183,51 .byte 128,128,58,128,116,186,29,244 .byte 128,128,58,128,116,186,29,244 .byte 190,190,194,190,153,124,97,39 .byte 190,190,194,190,153,124,97,39 .byte 205,205,19,205,38,222,135,235 .byte 205,205,19,205,38,222,135,235 .byte 52,52,208,52,189,228,104,137 .byte 52,52,208,52,189,228,104,137 .byte 72,72,61,72,122,117,144,50 .byte 72,72,61,72,122,117,144,50 .byte 255,255,219,255,171,36,227,84 .byte 255,255,219,255,171,36,227,84 .byte 122,122,245,122,247,143,244,141 .byte 122,122,245,122,247,143,244,141 .byte 144,144,122,144,244,234,61,100 .byte 144,144,122,144,244,234,61,100 .byte 95,95,97,95,194,62,190,157 .byte 95,95,97,95,194,62,190,157 .byte 32,32,128,32,29,160,64,61 .byte 32,32,128,32,29,160,64,61 .byte 104,104,189,104,103,213,208,15 .byte 104,104,189,104,103,213,208,15 .byte 26,26,104,26,208,114,52,202 .byte 26,26,104,26,208,114,52,202 .byte 174,174,130,174,25,44,65,183 .byte 174,174,130,174,25,44,65,183 .byte 180,180,234,180,201,94,117,125 .byte 180,180,234,180,201,94,117,125 .byte 84,84,77,84,154,25,168,206 .byte 84,84,77,84,154,25,168,206 .byte 147,147,118,147,236,229,59,127 .byte 147,147,118,147,236,229,59,127 .byte 34,34,136,34,13,170,68,47 .byte 34,34,136,34,13,170,68,47 .byte 100,100,141,100,7,233,200,99 .byte 100,100,141,100,7,233,200,99 .byte 241,241,227,241,219,18,255,42 .byte 241,241,227,241,219,18,255,42 .byte 115,115,209,115,191,162,230,204 .byte 115,115,209,115,191,162,230,204 .byte 18,18,72,18,144,90,36,130 .byte 18,18,72,18,144,90,36,130 .byte 64,64,29,64,58,93,128,122 .byte 64,64,29,64,58,93,128,122 .byte 8,8,32,8,64,40,16,72 .byte 8,8,32,8,64,40,16,72 .byte 195,195,43,195,86,232,155,149 .byte 195,195,43,195,86,232,155,149 .byte 236,236,151,236,51,123,197,223 .byte 236,236,151,236,51,123,197,223 .byte 219,219,75,219,150,144,171,77 .byte 219,219,75,219,150,144,171,77 .byte 161,161,190,161,97,31,95,192 .byte 161,161,190,161,97,31,95,192 .byte 141,141,14,141,28,131,7,145 .byte 141,141,14,141,28,131,7,145 .byte 61,61,244,61,245,201,122,200 .byte 61,61,244,61,245,201,122,200 .byte 151,151,102,151,204,241,51,91 .byte 151,151,102,151,204,241,51,91 .byte 0,0,0,0,0,0,0,0 .byte 0,0,0,0,0,0,0,0 .byte 207,207,27,207,54,212,131,249 .byte 207,207,27,207,54,212,131,249 .byte 43,43,172,43,69,135,86,110 .byte 43,43,172,43,69,135,86,110 .byte 118,118,197,118,151,179,236,225 .byte 118,118,197,118,151,179,236,225 .byte 130,130,50,130,100,176,25,230 .byte 130,130,50,130,100,176,25,230 .byte 214,214,127,214,254,169,177,40 .byte 214,214,127,214,254,169,177,40 .byte 27,27,108,27,216,119,54,195 .byte 27,27,108,27,216,119,54,195 .byte 181,181,238,181,193,91,119,116 .byte 181,181,238,181,193,91,119,116 .byte 175,175,134,175,17,41,67,190 .byte 175,175,134,175,17,41,67,190 .byte 106,106,181,106,119,223,212,29 .byte 106,106,181,106,119,223,212,29 .byte 80,80,93,80,186,13,160,234 .byte 80,80,93,80,186,13,160,234 .byte 69,69,9,69,18,76,138,87 .byte 69,69,9,69,18,76,138,87 .byte 243,243,235,243,203,24,251,56 .byte 243,243,235,243,203,24,251,56 .byte 48,48,192,48,157,240,96,173 .byte 48,48,192,48,157,240,96,173 .byte 239,239,155,239,43,116,195,196 .byte 239,239,155,239,43,116,195,196 .byte 63,63,252,63,229,195,126,218 .byte 63,63,252,63,229,195,126,218 .byte 85,85,73,85,146,28,170,199 .byte 85,85,73,85,146,28,170,199 .byte 162,162,178,162,121,16,89,219 .byte 162,162,178,162,121,16,89,219 .byte 234,234,143,234,3,101,201,233 .byte 234,234,143,234,3,101,201,233 .byte 101,101,137,101,15,236,202,106 .byte 101,101,137,101,15,236,202,106 .byte 186,186,210,186,185,104,105,3 .byte 186,186,210,186,185,104,105,3 .byte 47,47,188,47,101,147,94,74 .byte 47,47,188,47,101,147,94,74 .byte 192,192,39,192,78,231,157,142 .byte 192,192,39,192,78,231,157,142 .byte 222,222,95,222,190,129,161,96 .byte 222,222,95,222,190,129,161,96 .byte 28,28,112,28,224,108,56,252 .byte 28,28,112,28,224,108,56,252 .byte 253,253,211,253,187,46,231,70 .byte 253,253,211,253,187,46,231,70 .byte 77,77,41,77,82,100,154,31 .byte 77,77,41,77,82,100,154,31 .byte 146,146,114,146,228,224,57,118 .byte 146,146,114,146,228,224,57,118 .byte 117,117,201,117,143,188,234,250 .byte 117,117,201,117,143,188,234,250 .byte 6,6,24,6,48,30,12,54 .byte 6,6,24,6,48,30,12,54 .byte 138,138,18,138,36,152,9,174 .byte 138,138,18,138,36,152,9,174 .byte 178,178,242,178,249,64,121,75 .byte 178,178,242,178,249,64,121,75 .byte 230,230,191,230,99,89,209,133 .byte 230,230,191,230,99,89,209,133 .byte 14,14,56,14,112,54,28,126 .byte 14,14,56,14,112,54,28,126 .byte 31,31,124,31,248,99,62,231 .byte 31,31,124,31,248,99,62,231 .byte 98,98,149,98,55,247,196,85 .byte 98,98,149,98,55,247,196,85 .byte 212,212,119,212,238,163,181,58 .byte 212,212,119,212,238,163,181,58 .byte 168,168,154,168,41,50,77,129 .byte 168,168,154,168,41,50,77,129 .byte 150,150,98,150,196,244,49,82 .byte 150,150,98,150,196,244,49,82 .byte 249,249,195,249,155,58,239,98 .byte 249,249,195,249,155,58,239,98 .byte 197,197,51,197,102,246,151,163 .byte 197,197,51,197,102,246,151,163 .byte 37,37,148,37,53,177,74,16 .byte 37,37,148,37,53,177,74,16 .byte 89,89,121,89,242,32,178,171 .byte 89,89,121,89,242,32,178,171 .byte 132,132,42,132,84,174,21,208 .byte 132,132,42,132,84,174,21,208 .byte 114,114,213,114,183,167,228,197 .byte 114,114,213,114,183,167,228,197 .byte 57,57,228,57,213,221,114,236 .byte 57,57,228,57,213,221,114,236 .byte 76,76,45,76,90,97,152,22 .byte 76,76,45,76,90,97,152,22 .byte 94,94,101,94,202,59,188,148 .byte 94,94,101,94,202,59,188,148 .byte 120,120,253,120,231,133,240,159 .byte 120,120,253,120,231,133,240,159 .byte 56,56,224,56,221,216,112,229 .byte 56,56,224,56,221,216,112,229 .byte 140,140,10,140,20,134,5,152 .byte 140,140,10,140,20,134,5,152 .byte 209,209,99,209,198,178,191,23 .byte 209,209,99,209,198,178,191,23 .byte 165,165,174,165,65,11,87,228 .byte 165,165,174,165,65,11,87,228 .byte 226,226,175,226,67,77,217,161 .byte 226,226,175,226,67,77,217,161 .byte 97,97,153,97,47,248,194,78 .byte 97,97,153,97,47,248,194,78 .byte 179,179,246,179,241,69,123,66 .byte 179,179,246,179,241,69,123,66 .byte 33,33,132,33,21,165,66,52 .byte 33,33,132,33,21,165,66,52 .byte 156,156,74,156,148,214,37,8 .byte 156,156,74,156,148,214,37,8 .byte 30,30,120,30,240,102,60,238 .byte 30,30,120,30,240,102,60,238 .byte 67,67,17,67,34,82,134,97 .byte 67,67,17,67,34,82,134,97 .byte 199,199,59,199,118,252,147,177 .byte 199,199,59,199,118,252,147,177 .byte 252,252,215,252,179,43,229,79 .byte 252,252,215,252,179,43,229,79 .byte 4,4,16,4,32,20,8,36 .byte 4,4,16,4,32,20,8,36 .byte 81,81,89,81,178,8,162,227 .byte 81,81,89,81,178,8,162,227 .byte 153,153,94,153,188,199,47,37 .byte 153,153,94,153,188,199,47,37 .byte 109,109,169,109,79,196,218,34 .byte 109,109,169,109,79,196,218,34 .byte 13,13,52,13,104,57,26,101 .byte 13,13,52,13,104,57,26,101 .byte 250,250,207,250,131,53,233,121 .byte 250,250,207,250,131,53,233,121 .byte 223,223,91,223,182,132,163,105 .byte 223,223,91,223,182,132,163,105 .byte 126,126,229,126,215,155,252,169 .byte 126,126,229,126,215,155,252,169 .byte 36,36,144,36,61,180,72,25 .byte 36,36,144,36,61,180,72,25 .byte 59,59,236,59,197,215,118,254 .byte 59,59,236,59,197,215,118,254 .byte 171,171,150,171,49,61,75,154 .byte 171,171,150,171,49,61,75,154 .byte 206,206,31,206,62,209,129,240 .byte 206,206,31,206,62,209,129,240 .byte 17,17,68,17,136,85,34,153 .byte 17,17,68,17,136,85,34,153 .byte 143,143,6,143,12,137,3,131 .byte 143,143,6,143,12,137,3,131 .byte 78,78,37,78,74,107,156,4 .byte 78,78,37,78,74,107,156,4 .byte 183,183,230,183,209,81,115,102 .byte 183,183,230,183,209,81,115,102 .byte 235,235,139,235,11,96,203,224 .byte 235,235,139,235,11,96,203,224 .byte 60,60,240,60,253,204,120,193 .byte 60,60,240,60,253,204,120,193 .byte 129,129,62,129,124,191,31,253 .byte 129,129,62,129,124,191,31,253 .byte 148,148,106,148,212,254,53,64 .byte 148,148,106,148,212,254,53,64 .byte 247,247,251,247,235,12,243,28 .byte 247,247,251,247,235,12,243,28 .byte 185,185,222,185,161,103,111,24 .byte 185,185,222,185,161,103,111,24 .byte 19,19,76,19,152,95,38,139 .byte 19,19,76,19,152,95,38,139 .byte 44,44,176,44,125,156,88,81 .byte 44,44,176,44,125,156,88,81 .byte 211,211,107,211,214,184,187,5 .byte 211,211,107,211,214,184,187,5 .byte 231,231,187,231,107,92,211,140 .byte 231,231,187,231,107,92,211,140 .byte 110,110,165,110,87,203,220,57 .byte 110,110,165,110,87,203,220,57 .byte 196,196,55,196,110,243,149,170 .byte 196,196,55,196,110,243,149,170 .byte 3,3,12,3,24,15,6,27 .byte 3,3,12,3,24,15,6,27 .byte 86,86,69,86,138,19,172,220 .byte 86,86,69,86,138,19,172,220 .byte 68,68,13,68,26,73,136,94 .byte 68,68,13,68,26,73,136,94 .byte 127,127,225,127,223,158,254,160 .byte 127,127,225,127,223,158,254,160 .byte 169,169,158,169,33,55,79,136 .byte 169,169,158,169,33,55,79,136 .byte 42,42,168,42,77,130,84,103 .byte 42,42,168,42,77,130,84,103 .byte 187,187,214,187,177,109,107,10 .byte 187,187,214,187,177,109,107,10 .byte 193,193,35,193,70,226,159,135 .byte 193,193,35,193,70,226,159,135 .byte 83,83,81,83,162,2,166,241 .byte 83,83,81,83,162,2,166,241 .byte 220,220,87,220,174,139,165,114 .byte 220,220,87,220,174,139,165,114 .byte 11,11,44,11,88,39,22,83 .byte 11,11,44,11,88,39,22,83 .byte 157,157,78,157,156,211,39,1 .byte 157,157,78,157,156,211,39,1 .byte 108,108,173,108,71,193,216,43 .byte 108,108,173,108,71,193,216,43 .byte 49,49,196,49,149,245,98,164 .byte 49,49,196,49,149,245,98,164 .byte 116,116,205,116,135,185,232,243 .byte 116,116,205,116,135,185,232,243 .byte 246,246,255,246,227,9,241,21 .byte 246,246,255,246,227,9,241,21 .byte 70,70,5,70,10,67,140,76 .byte 70,70,5,70,10,67,140,76 .byte 172,172,138,172,9,38,69,165 .byte 172,172,138,172,9,38,69,165 .byte 137,137,30,137,60,151,15,181 .byte 137,137,30,137,60,151,15,181 .byte 20,20,80,20,160,68,40,180 .byte 20,20,80,20,160,68,40,180 .byte 225,225,163,225,91,66,223,186 .byte 225,225,163,225,91,66,223,186 .byte 22,22,88,22,176,78,44,166 .byte 22,22,88,22,176,78,44,166 .byte 58,58,232,58,205,210,116,247 .byte 58,58,232,58,205,210,116,247 .byte 105,105,185,105,111,208,210,6 .byte 105,105,185,105,111,208,210,6 .byte 9,9,36,9,72,45,18,65 .byte 9,9,36,9,72,45,18,65 .byte 112,112,221,112,167,173,224,215 .byte 112,112,221,112,167,173,224,215 .byte 182,182,226,182,217,84,113,111 .byte 182,182,226,182,217,84,113,111 .byte 208,208,103,208,206,183,189,30 .byte 208,208,103,208,206,183,189,30 .byte 237,237,147,237,59,126,199,214 .byte 237,237,147,237,59,126,199,214 .byte 204,204,23,204,46,219,133,226 .byte 204,204,23,204,46,219,133,226 .byte 66,66,21,66,42,87,132,104 .byte 66,66,21,66,42,87,132,104 .byte 152,152,90,152,180,194,45,44 .byte 152,152,90,152,180,194,45,44 .byte 164,164,170,164,73,14,85,237 .byte 164,164,170,164,73,14,85,237 .byte 40,40,160,40,93,136,80,117 .byte 40,40,160,40,93,136,80,117 .byte 92,92,109,92,218,49,184,134 .byte 92,92,109,92,218,49,184,134 .byte 248,248,199,248,147,63,237,107 .byte 248,248,199,248,147,63,237,107 .byte 134,134,34,134,68,164,17,194 .byte 134,134,34,134,68,164,17,194 .byte 24,35,198,232,135,184,1,79 .byte 54,166,210,245,121,111,145,82 .byte 96,188,155,142,163,12,123,53 .byte 29,224,215,194,46,75,254,87 .byte 21,119,55,229,159,240,74,218 .byte 88,201,41,10,177,160,107,133 .byte 189,93,16,244,203,62,5,103 .byte 228,39,65,139,167,125,149,216 .byte 251,238,124,102,221,23,71,158 .byte 202,45,191,7,173,90,131,51 .size whirlpool_block_mmx,.-.L_whirlpool_block_mmx_begin #endif Index: head/secure/lib/libcrypto/i386/x86-gf2m.S =================================================================== --- head/secure/lib/libcrypto/i386/x86-gf2m.S (revision 299480) +++ head/secure/lib/libcrypto/i386/x86-gf2m.S (revision 299481) @@ -1,693 +1,694 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86-gf2m.pl. #ifdef PIC .file "x86-gf2m.S" .text .type _mul_1x1_mmx,@function .align 16 _mul_1x1_mmx: subl $36,%esp movl %eax,%ecx leal (%eax,%eax,1),%edx andl $1073741823,%ecx leal (%edx,%edx,1),%ebp movl $0,(%esp) andl $2147483647,%edx movd %eax,%mm2 movd %ebx,%mm3 movl %ecx,4(%esp) xorl %edx,%ecx pxor %mm5,%mm5 pxor %mm4,%mm4 movl %edx,8(%esp) xorl %ebp,%edx movl %ecx,12(%esp) pcmpgtd %mm2,%mm5 paddd %mm2,%mm2 xorl %edx,%ecx movl %ebp,16(%esp) xorl %edx,%ebp pand %mm3,%mm5 pcmpgtd %mm2,%mm4 movl %ecx,20(%esp) xorl %ecx,%ebp psllq $31,%mm5 pand %mm3,%mm4 movl %edx,24(%esp) movl $7,%esi movl %ebp,28(%esp) movl %esi,%ebp andl %ebx,%esi shrl $3,%ebx movl %ebp,%edi psllq $30,%mm4 andl %ebx,%edi shrl $3,%ebx movd (%esp,%esi,4),%mm0 movl %ebp,%esi andl %ebx,%esi shrl $3,%ebx movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $3,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $6,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $9,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $12,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $15,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $18,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $21,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $24,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 pxor %mm4,%mm0 psllq $27,%mm2 pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 pxor %mm5,%mm0 psllq $30,%mm1 addl $36,%esp pxor %mm1,%mm0 ret .size _mul_1x1_mmx,.-_mul_1x1_mmx .type _mul_1x1_ialu,@function .align 16 _mul_1x1_ialu: subl $36,%esp movl %eax,%ecx leal (%eax,%eax,1),%edx leal (,%eax,4),%ebp andl $1073741823,%ecx leal (%eax,%eax,1),%edi sarl $31,%eax movl $0,(%esp) andl $2147483647,%edx movl %ecx,4(%esp) xorl %edx,%ecx movl %edx,8(%esp) xorl %ebp,%edx movl %ecx,12(%esp) xorl %edx,%ecx movl %ebp,16(%esp) xorl %edx,%ebp movl %ecx,20(%esp) xorl %ecx,%ebp sarl $31,%edi andl %ebx,%eax movl %edx,24(%esp) andl %ebx,%edi movl %ebp,28(%esp) movl %eax,%edx shll $31,%eax movl %edi,%ecx shrl $1,%edx movl $7,%esi shll $30,%edi andl %ebx,%esi shrl $2,%ecx xorl %edi,%eax shrl $3,%ebx movl $7,%edi andl %ebx,%edi shrl $3,%ebx xorl %ecx,%edx xorl (%esp,%esi,4),%eax movl $7,%esi andl %ebx,%esi shrl $3,%ebx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $3,%ebp andl %ebx,%edi shrl $29,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $6,%ecx andl %ebx,%esi shrl $26,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $9,%ebp andl %ebx,%edi shrl $23,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $12,%ecx andl %ebx,%esi shrl $20,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $15,%ebp andl %ebx,%edi shrl $17,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $18,%ecx andl %ebx,%esi shrl $14,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $21,%ebp andl %ebx,%edi shrl $11,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $24,%ecx andl %ebx,%esi shrl $8,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl %ebp,%ecx shll $27,%ebp movl (%esp,%esi,4),%edi shrl $5,%ecx movl %edi,%esi xorl %ebp,%eax shll $30,%edi xorl %ecx,%edx shrl $2,%esi xorl %edi,%eax xorl %esi,%edx addl $36,%esp ret .size _mul_1x1_ialu,.-_mul_1x1_ialu .globl bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,@function .align 16 bn_GF2m_mul_2x2: .L_bn_GF2m_mul_2x2_begin: call .L000PIC_me_up .L000PIC_me_up: popl %edx leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx movl (%edx),%eax movl 4(%edx),%edx testl $8388608,%eax jz .L001ialu testl $16777216,%eax jz .L002mmx testl $2,%edx jz .L002mmx movups 8(%esp),%xmm0 shufps $177,%xmm0,%xmm0 .byte 102,15,58,68,192,1 movl 4(%esp),%eax movups %xmm0,(%eax) ret .align 16 .L002mmx: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 24(%esp),%eax movl 32(%esp),%ebx call _mul_1x1_mmx movq %mm0,%mm7 movl 28(%esp),%eax movl 36(%esp),%ebx call _mul_1x1_mmx movq %mm0,%mm6 movl 24(%esp),%eax movl 32(%esp),%ebx xorl 28(%esp),%eax xorl 36(%esp),%ebx call _mul_1x1_mmx pxor %mm7,%mm0 movl 20(%esp),%eax pxor %mm6,%mm0 movq %mm0,%mm2 psllq $32,%mm0 popl %edi psrlq $32,%mm2 popl %esi pxor %mm6,%mm0 popl %ebx pxor %mm7,%mm2 movq %mm0,(%eax) popl %ebp movq %mm2,8(%eax) emms ret .align 16 .L001ialu: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $20,%esp movl 44(%esp),%eax movl 52(%esp),%ebx call _mul_1x1_ialu movl %eax,8(%esp) movl %edx,12(%esp) movl 48(%esp),%eax movl 56(%esp),%ebx call _mul_1x1_ialu movl %eax,(%esp) movl %edx,4(%esp) movl 44(%esp),%eax movl 52(%esp),%ebx xorl 48(%esp),%eax xorl 56(%esp),%ebx call _mul_1x1_ialu movl 40(%esp),%ebp movl (%esp),%ebx movl 4(%esp),%ecx movl 8(%esp),%edi movl 12(%esp),%esi xorl %edx,%eax xorl %ecx,%edx xorl %ebx,%eax movl %ebx,(%ebp) xorl %edi,%edx movl %esi,12(%ebp) xorl %esi,%eax addl $20,%esp xorl %esi,%edx popl %edi xorl %edx,%eax popl %esi movl %edx,8(%ebp) popl %ebx movl %eax,4(%ebp) popl %ebp ret .size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 .byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "x86-gf2m.S" .text .type _mul_1x1_mmx,@function .align 16 _mul_1x1_mmx: subl $36,%esp movl %eax,%ecx leal (%eax,%eax,1),%edx andl $1073741823,%ecx leal (%edx,%edx,1),%ebp movl $0,(%esp) andl $2147483647,%edx movd %eax,%mm2 movd %ebx,%mm3 movl %ecx,4(%esp) xorl %edx,%ecx pxor %mm5,%mm5 pxor %mm4,%mm4 movl %edx,8(%esp) xorl %ebp,%edx movl %ecx,12(%esp) pcmpgtd %mm2,%mm5 paddd %mm2,%mm2 xorl %edx,%ecx movl %ebp,16(%esp) xorl %edx,%ebp pand %mm3,%mm5 pcmpgtd %mm2,%mm4 movl %ecx,20(%esp) xorl %ecx,%ebp psllq $31,%mm5 pand %mm3,%mm4 movl %edx,24(%esp) movl $7,%esi movl %ebp,28(%esp) movl %esi,%ebp andl %ebx,%esi shrl $3,%ebx movl %ebp,%edi psllq $30,%mm4 andl %ebx,%edi shrl $3,%ebx movd (%esp,%esi,4),%mm0 movl %ebp,%esi andl %ebx,%esi shrl $3,%ebx movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $3,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $6,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $9,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $12,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $15,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $18,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 movl %ebp,%edi psllq $21,%mm2 andl %ebx,%edi shrl $3,%ebx pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 movl %ebp,%esi psllq $24,%mm1 andl %ebx,%esi shrl $3,%ebx pxor %mm1,%mm0 movd (%esp,%edi,4),%mm2 pxor %mm4,%mm0 psllq $27,%mm2 pxor %mm2,%mm0 movd (%esp,%esi,4),%mm1 pxor %mm5,%mm0 psllq $30,%mm1 addl $36,%esp pxor %mm1,%mm0 ret .size _mul_1x1_mmx,.-_mul_1x1_mmx .type _mul_1x1_ialu,@function .align 16 _mul_1x1_ialu: subl $36,%esp movl %eax,%ecx leal (%eax,%eax,1),%edx leal (,%eax,4),%ebp andl $1073741823,%ecx leal (%eax,%eax,1),%edi sarl $31,%eax movl $0,(%esp) andl $2147483647,%edx movl %ecx,4(%esp) xorl %edx,%ecx movl %edx,8(%esp) xorl %ebp,%edx movl %ecx,12(%esp) xorl %edx,%ecx movl %ebp,16(%esp) xorl %edx,%ebp movl %ecx,20(%esp) xorl %ecx,%ebp sarl $31,%edi andl %ebx,%eax movl %edx,24(%esp) andl %ebx,%edi movl %ebp,28(%esp) movl %eax,%edx shll $31,%eax movl %edi,%ecx shrl $1,%edx movl $7,%esi shll $30,%edi andl %ebx,%esi shrl $2,%ecx xorl %edi,%eax shrl $3,%ebx movl $7,%edi andl %ebx,%edi shrl $3,%ebx xorl %ecx,%edx xorl (%esp,%esi,4),%eax movl $7,%esi andl %ebx,%esi shrl $3,%ebx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $3,%ebp andl %ebx,%edi shrl $29,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $6,%ecx andl %ebx,%esi shrl $26,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $9,%ebp andl %ebx,%edi shrl $23,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $12,%ecx andl %ebx,%esi shrl $20,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $15,%ebp andl %ebx,%edi shrl $17,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $18,%ecx andl %ebx,%esi shrl $14,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl $7,%edi movl %ebp,%ecx shll $21,%ebp andl %ebx,%edi shrl $11,%ecx xorl %ebp,%eax shrl $3,%ebx xorl %ecx,%edx movl (%esp,%esi,4),%ecx movl $7,%esi movl %ecx,%ebp shll $24,%ecx andl %ebx,%esi shrl $8,%ebp xorl %ecx,%eax shrl $3,%ebx xorl %ebp,%edx movl (%esp,%edi,4),%ebp movl %ebp,%ecx shll $27,%ebp movl (%esp,%esi,4),%edi shrl $5,%ecx movl %edi,%esi xorl %ebp,%eax shll $30,%edi xorl %ecx,%edx shrl $2,%esi xorl %edi,%eax xorl %esi,%edx addl $36,%esp ret .size _mul_1x1_ialu,.-_mul_1x1_ialu .globl bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,@function .align 16 bn_GF2m_mul_2x2: .L_bn_GF2m_mul_2x2_begin: leal OPENSSL_ia32cap_P,%edx movl (%edx),%eax movl 4(%edx),%edx testl $8388608,%eax jz .L000ialu testl $16777216,%eax jz .L001mmx testl $2,%edx jz .L001mmx movups 8(%esp),%xmm0 shufps $177,%xmm0,%xmm0 .byte 102,15,58,68,192,1 movl 4(%esp),%eax movups %xmm0,(%eax) ret .align 16 .L001mmx: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 24(%esp),%eax movl 32(%esp),%ebx call _mul_1x1_mmx movq %mm0,%mm7 movl 28(%esp),%eax movl 36(%esp),%ebx call _mul_1x1_mmx movq %mm0,%mm6 movl 24(%esp),%eax movl 32(%esp),%ebx xorl 28(%esp),%eax xorl 36(%esp),%ebx call _mul_1x1_mmx pxor %mm7,%mm0 movl 20(%esp),%eax pxor %mm6,%mm0 movq %mm0,%mm2 psllq $32,%mm0 popl %edi psrlq $32,%mm2 popl %esi pxor %mm6,%mm0 popl %ebx pxor %mm7,%mm2 movq %mm0,(%eax) popl %ebp movq %mm2,8(%eax) emms ret .align 16 .L000ialu: pushl %ebp pushl %ebx pushl %esi pushl %edi subl $20,%esp movl 44(%esp),%eax movl 52(%esp),%ebx call _mul_1x1_ialu movl %eax,8(%esp) movl %edx,12(%esp) movl 48(%esp),%eax movl 56(%esp),%ebx call _mul_1x1_ialu movl %eax,(%esp) movl %edx,4(%esp) movl 44(%esp),%eax movl 52(%esp),%ebx xorl 48(%esp),%eax xorl 56(%esp),%ebx call _mul_1x1_ialu movl 40(%esp),%ebp movl (%esp),%ebx movl 4(%esp),%ecx movl 8(%esp),%edi movl 12(%esp),%esi xorl %edx,%eax xorl %ecx,%edx xorl %ebx,%eax movl %ebx,(%ebp) xorl %edi,%edx movl %esi,12(%ebp) xorl %esi,%eax addl $20,%esp xorl %esi,%edx popl %edi xorl %edx,%eax popl %esi movl %edx,8(%ebp) popl %ebx movl %eax,4(%ebp) popl %ebp ret .size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 .byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/x86-mont.S =================================================================== --- head/secure/lib/libcrypto/i386/x86-mont.S (revision 299480) +++ head/secure/lib/libcrypto/i386/x86-mont.S (revision 299481) @@ -1,935 +1,936 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86-mont.pl. #ifdef PIC .file "x86-mont.S" .text .globl bn_mul_mont .type bn_mul_mont,@function .align 16 bn_mul_mont: .L_bn_mul_mont_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %eax,%eax movl 40(%esp),%edi cmpl $4,%edi jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx movl %esp,%ebp addl $2,%edi negl %edi leal -32(%esp,%edi,4),%esp negl %edi movl %esp,%eax subl %edx,%eax andl $2047,%eax subl %eax,%esp xorl %esp,%edx andl $2048,%edx xorl $2048,%edx subl %edx,%esp andl $-64,%esp movl %ebp,%eax subl %esp,%eax andl $-4096,%eax .L001page_walk: movl (%esp,%eax,1),%edx subl $4096,%eax .byte 46 jnc .L001page_walk movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx movl %ebp,24(%esp) call .L002PIC_me_up .L002PIC_me_up: popl %eax leal OPENSSL_ia32cap_P-.L002PIC_me_up(%eax),%eax btl $26,(%eax) jnc .L003non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi movl 12(%esp),%edi movl 16(%esp),%ebp xorl %edx,%edx xorl %ecx,%ecx movd (%edi),%mm4 movd (%esi),%mm5 movd (%ebp),%mm3 pmuludq %mm4,%mm5 movq %mm5,%mm2 movq %mm5,%mm0 pand %mm7,%mm0 pmuludq 20(%esp),%mm5 pmuludq %mm5,%mm3 paddq %mm0,%mm3 movd 4(%ebp),%mm1 movd 4(%esi),%mm0 psrlq $32,%mm2 psrlq $32,%mm3 incl %ecx .align 16 .L0041st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 movd 4(%ebp,%ecx,4),%mm1 paddq %mm0,%mm3 movd 4(%esi,%ecx,4),%mm0 psrlq $32,%mm2 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx jl .L0041st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 paddq %mm0,%mm3 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm2 psrlq $32,%mm3 paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx .L005outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 movd 32(%esp),%mm6 movd (%ebp),%mm3 pmuludq %mm4,%mm5 paddq %mm6,%mm5 movq %mm5,%mm0 movq %mm5,%mm2 pand %mm7,%mm0 pmuludq 20(%esp),%mm5 pmuludq %mm5,%mm3 paddq %mm0,%mm3 movd 36(%esp),%mm6 movd 4(%ebp),%mm1 movd 4(%esi),%mm0 psrlq $32,%mm2 psrlq $32,%mm3 paddq %mm6,%mm2 incl %ecx decl %ebx .L006inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 movd 36(%esp,%ecx,4),%mm6 pand %mm7,%mm0 movd 4(%ebp,%ecx,4),%mm1 paddq %mm0,%mm3 movd 4(%esi,%ecx,4),%mm0 psrlq $32,%mm2 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm3 paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx jnz .L006inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 paddq %mm0,%mm3 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm2 psrlq $32,%mm3 movd 36(%esp,%ebx,4),%mm6 paddq %mm2,%mm3 paddq %mm6,%mm3 movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx jle .L005outer emms jmp .L007common_tail .align 16 .L003non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi xorl %ecx,%ecx movl %esi,%edx andl $1,%ebp subl %edi,%edx leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi jz .L008bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 .L009mull: movl %edx,%ebp mull %edi addl %eax,%ebp leal 1(%ecx),%ecx adcl $0,%edx movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) jl .L009mull movl %edx,%ebp mull %edi movl 20(%esp),%edi addl %ebp,%eax movl 16(%esp),%esi adcl $0,%edx imull 32(%esp),%edi movl %eax,32(%esp,%ebx,4) xorl %ecx,%ecx movl %edx,36(%esp,%ebx,4) movl %ecx,40(%esp,%ebx,4) movl (%esi),%eax mull %edi addl 32(%esp),%eax movl 4(%esi),%eax adcl $0,%edx incl %ecx jmp .L0102ndmadd .align 16 .L0111stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) jl .L0111stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax movl 20(%esp),%edi adcl $0,%edx movl 16(%esp),%esi addl %eax,%ebp adcl $0,%edx imull 32(%esp),%edi xorl %ecx,%ecx addl 36(%esp,%ebx,4),%edx movl %ebp,32(%esp,%ebx,4) adcl $0,%ecx movl (%esi),%eax movl %edx,36(%esp,%ebx,4) movl %ecx,40(%esp,%ebx,4) mull %edi addl 32(%esp),%eax movl 4(%esi),%eax adcl $0,%edx movl $1,%ecx .align 16 .L0102ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) jl .L0102ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp adcl $0,%edx addl %eax,%ebp adcl $0,%edx movl %ebp,28(%esp,%ebx,4) xorl %eax,%eax movl 12(%esp),%ecx addl 36(%esp,%ebx,4),%edx adcl 40(%esp,%ebx,4),%eax leal 4(%ecx),%ecx movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) je .L007common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax jmp .L0111stmadd .align 16 .L008bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax mull %edi movl %eax,32(%esp) movl %edx,%ebx shrl $1,%edx andl $1,%ebx incl %ecx .align 16 .L012sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax leal 1(%ecx),%ecx adcl $0,%edx leal (%ebx,%eax,2),%ebp shrl $31,%eax cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) jl .L012sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax movl 20(%esp),%edi adcl $0,%edx movl 16(%esp),%esi leal (%ebx,%eax,2),%ebp imull 32(%esp),%edi shrl $31,%eax movl %ebp,32(%esp,%ecx,4) leal (%eax,%edx,2),%ebp movl (%esi),%eax shrl $31,%edx movl %ebp,36(%esp,%ecx,4) movl %edx,40(%esp,%ecx,4) mull %edi addl 32(%esp),%eax movl %ecx,%ebx adcl $0,%edx movl 4(%esi),%eax movl $1,%ecx .align 16 .L0133rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp adcl $0,%edx addl %eax,%ebp movl 4(%esi,%ecx,4),%eax adcl $0,%edx movl %ebp,28(%esp,%ecx,4) movl %edx,%ebp mull %edi addl 36(%esp,%ecx,4),%ebp leal 2(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) jl .L0133rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp adcl $0,%edx addl %eax,%ebp adcl $0,%edx movl %ebp,28(%esp,%ebx,4) movl 12(%esp),%ecx xorl %eax,%eax movl 8(%esp),%esi addl 36(%esp,%ebx,4),%edx adcl 40(%esp,%ebx,4),%eax movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) je .L007common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax movl %ecx,12(%esp) mull %edi addl 32(%esp,%ecx,4),%eax adcl $0,%edx movl %eax,32(%esp,%ecx,4) xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx je .L014sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 .L015sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax leal (%eax,%eax,1),%ebp adcl $0,%edx shrl $31,%eax addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%eax addl %ebx,%ebp adcl $0,%eax cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx jle .L015sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp .L014sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi addl 32(%esp,%ecx,4),%edx movl (%esi),%eax adcl $0,%ebp movl %edx,32(%esp,%ecx,4) movl %ebp,36(%esp,%ecx,4) mull %edi addl 32(%esp),%eax leal -1(%ecx),%ebx adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax jmp .L0133rdmadd .align 16 .L007common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi movl (%esi),%eax movl %ebx,%ecx xorl %edx,%edx .align 16 .L016sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx jge .L016sub sbbl $0,%eax andl %eax,%esi notl %eax movl %edi,%ebp andl %eax,%ebp orl %ebp,%esi .align 16 .L017copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx jge .L017copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_mont,.-.L_bn_mul_mont_begin .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #else .file "x86-mont.S" .text .globl bn_mul_mont .type bn_mul_mont,@function .align 16 bn_mul_mont: .L_bn_mul_mont_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %eax,%eax movl 40(%esp),%edi cmpl $4,%edi jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx movl %esp,%ebp addl $2,%edi negl %edi leal -32(%esp,%edi,4),%esp negl %edi movl %esp,%eax subl %edx,%eax andl $2047,%eax subl %eax,%esp xorl %esp,%edx andl $2048,%edx xorl $2048,%edx subl %edx,%esp andl $-64,%esp movl %ebp,%eax subl %esp,%eax andl $-4096,%eax .L001page_walk: movl (%esp,%eax,1),%edx subl $4096,%eax .byte 46 jnc .L001page_walk movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edx movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) movl %edx,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx movl %ebp,24(%esp) leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) jnc .L002non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi movl 12(%esp),%edi movl 16(%esp),%ebp xorl %edx,%edx xorl %ecx,%ecx movd (%edi),%mm4 movd (%esi),%mm5 movd (%ebp),%mm3 pmuludq %mm4,%mm5 movq %mm5,%mm2 movq %mm5,%mm0 pand %mm7,%mm0 pmuludq 20(%esp),%mm5 pmuludq %mm5,%mm3 paddq %mm0,%mm3 movd 4(%ebp),%mm1 movd 4(%esi),%mm0 psrlq $32,%mm2 psrlq $32,%mm3 incl %ecx .align 16 .L0031st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 movd 4(%ebp,%ecx,4),%mm1 paddq %mm0,%mm3 movd 4(%esi,%ecx,4),%mm0 psrlq $32,%mm2 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx jl .L0031st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 paddq %mm0,%mm3 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm2 psrlq $32,%mm3 paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx .L004outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 movd 32(%esp),%mm6 movd (%ebp),%mm3 pmuludq %mm4,%mm5 paddq %mm6,%mm5 movq %mm5,%mm0 movq %mm5,%mm2 pand %mm7,%mm0 pmuludq 20(%esp),%mm5 pmuludq %mm5,%mm3 paddq %mm0,%mm3 movd 36(%esp),%mm6 movd 4(%ebp),%mm1 movd 4(%esi),%mm0 psrlq $32,%mm2 psrlq $32,%mm3 paddq %mm6,%mm2 incl %ecx decl %ebx .L005inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 movd 36(%esp,%ecx,4),%mm6 pand %mm7,%mm0 movd 4(%ebp,%ecx,4),%mm1 paddq %mm0,%mm3 movd 4(%esi,%ecx,4),%mm0 psrlq $32,%mm2 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm3 paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx jnz .L005inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 paddq %mm1,%mm3 movq %mm2,%mm0 pand %mm7,%mm0 paddq %mm0,%mm3 movd %mm3,28(%esp,%ecx,4) psrlq $32,%mm2 psrlq $32,%mm3 movd 36(%esp,%ebx,4),%mm6 paddq %mm2,%mm3 paddq %mm6,%mm3 movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx jle .L004outer emms jmp .L006common_tail .align 16 .L002non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi xorl %ecx,%ecx movl %esi,%edx andl $1,%ebp subl %edi,%edx leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi jz .L007bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 .L008mull: movl %edx,%ebp mull %edi addl %eax,%ebp leal 1(%ecx),%ecx adcl $0,%edx movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) jl .L008mull movl %edx,%ebp mull %edi movl 20(%esp),%edi addl %ebp,%eax movl 16(%esp),%esi adcl $0,%edx imull 32(%esp),%edi movl %eax,32(%esp,%ebx,4) xorl %ecx,%ecx movl %edx,36(%esp,%ebx,4) movl %ecx,40(%esp,%ebx,4) movl (%esi),%eax mull %edi addl 32(%esp),%eax movl 4(%esi),%eax adcl $0,%edx incl %ecx jmp .L0092ndmadd .align 16 .L0101stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) jl .L0101stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax movl 20(%esp),%edi adcl $0,%edx movl 16(%esp),%esi addl %eax,%ebp adcl $0,%edx imull 32(%esp),%edi xorl %ecx,%ecx addl 36(%esp,%ebx,4),%edx movl %ebp,32(%esp,%ebx,4) adcl $0,%ecx movl (%esi),%eax movl %edx,36(%esp,%ebx,4) movl %ecx,40(%esp,%ebx,4) mull %edi addl 32(%esp),%eax movl 4(%esi),%eax adcl $0,%edx movl $1,%ecx .align 16 .L0092ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) jl .L0092ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp adcl $0,%edx addl %eax,%ebp adcl $0,%edx movl %ebp,28(%esp,%ebx,4) xorl %eax,%eax movl 12(%esp),%ecx addl 36(%esp,%ebx,4),%edx adcl 40(%esp,%ebx,4),%eax leal 4(%ecx),%ecx movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) je .L006common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax jmp .L0101stmadd .align 16 .L007bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax mull %edi movl %eax,32(%esp) movl %edx,%ebx shrl $1,%edx andl $1,%ebx incl %ecx .align 16 .L011sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax leal 1(%ecx),%ecx adcl $0,%edx leal (%ebx,%eax,2),%ebp shrl $31,%eax cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) jl .L011sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax movl 20(%esp),%edi adcl $0,%edx movl 16(%esp),%esi leal (%ebx,%eax,2),%ebp imull 32(%esp),%edi shrl $31,%eax movl %ebp,32(%esp,%ecx,4) leal (%eax,%edx,2),%ebp movl (%esi),%eax shrl $31,%edx movl %ebp,36(%esp,%ecx,4) movl %edx,40(%esp,%ecx,4) mull %edi addl 32(%esp),%eax movl %ecx,%ebx adcl $0,%edx movl 4(%esi),%eax movl $1,%ecx .align 16 .L0123rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp adcl $0,%edx addl %eax,%ebp movl 4(%esi,%ecx,4),%eax adcl $0,%edx movl %ebp,28(%esp,%ecx,4) movl %edx,%ebp mull %edi addl 36(%esp,%ecx,4),%ebp leal 2(%ecx),%ecx adcl $0,%edx addl %eax,%ebp movl (%esi,%ecx,4),%eax adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) jl .L0123rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp adcl $0,%edx addl %eax,%ebp adcl $0,%edx movl %ebp,28(%esp,%ebx,4) movl 12(%esp),%ecx xorl %eax,%eax movl 8(%esp),%esi addl 36(%esp,%ebx,4),%edx adcl 40(%esp,%ebx,4),%eax movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) je .L006common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax movl %ecx,12(%esp) mull %edi addl 32(%esp,%ecx,4),%eax adcl $0,%edx movl %eax,32(%esp,%ecx,4) xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx je .L013sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 .L014sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi addl %ebp,%eax leal (%eax,%eax,1),%ebp adcl $0,%edx shrl $31,%eax addl 32(%esp,%ecx,4),%ebp leal 1(%ecx),%ecx adcl $0,%eax addl %ebx,%ebp adcl $0,%eax cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx jle .L014sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp .L013sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi addl 32(%esp,%ecx,4),%edx movl (%esi),%eax adcl $0,%ebp movl %edx,32(%esp,%ecx,4) movl %ebp,36(%esp,%ecx,4) mull %edi addl 32(%esp),%eax leal -1(%ecx),%ebx adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax jmp .L0123rdmadd .align 16 .L006common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi movl (%esi),%eax movl %ebx,%ecx xorl %edx,%edx .align 16 .L015sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx jge .L015sub sbbl $0,%eax andl %eax,%esi notl %eax movl %edi,%ebp andl %eax,%ebp orl %ebp,%esi .align 16 .L016copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx jge .L016copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: popl %edi popl %esi popl %ebx popl %ebp ret .size bn_mul_mont,.-.L_bn_mul_mont_begin .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #endif Index: head/secure/lib/libcrypto/i386/x86cpuid.S =================================================================== --- head/secure/lib/libcrypto/i386/x86cpuid.S (revision 299480) +++ head/secure/lib/libcrypto/i386/x86cpuid.S (revision 299481) @@ -1,739 +1,740 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from x86cpuid.pl. #ifdef PIC .file "x86cpuid.S" .text .globl OPENSSL_ia32_cpuid .type OPENSSL_ia32_cpuid,@function .align 16 OPENSSL_ia32_cpuid: .L_OPENSSL_ia32_cpuid_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edx,%edx pushfl popl %eax movl %eax,%ecx xorl $2097152,%eax pushl %eax popfl pushfl popl %eax xorl %eax,%ecx xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid movl 20(%esp),%esi movl %eax,8(%esi) .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax cmpl $1970169159,%ebx setne %al movl %eax,%ebp cmpl $1231384169,%edx setne %al orl %eax,%ebp cmpl $1818588270,%ecx setne %al orl %eax,%ebp jz .L001intel cmpl $1752462657,%ebx setne %al movl %eax,%esi cmpl $1769238117,%edx setne %al orl %eax,%esi cmpl $1145913699,%ecx setne %al orl %eax,%esi jnz .L001intel movl $2147483648,%eax .byte 0x0f,0xa2 cmpl $2147483649,%eax jb .L001intel movl %eax,%esi movl $2147483649,%eax .byte 0x0f,0xa2 orl %ecx,%ebp andl $2049,%ebp cmpl $2147483656,%esi jb .L001intel movl $2147483656,%eax .byte 0x0f,0xa2 movzbl %cl,%esi incl %esi movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 btl $28,%edx jnc .L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx ja .L002generic andl $4026531839,%edx jmp .L002generic .L001intel: cmpl $7,%edi jb .L003cacheinfo movl 20(%esp),%esi movl $7,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 movl %ebx,8(%esi) .L003cacheinfo: cmpl $4,%edi movl $-1,%edi jb .L004nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi .L004nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp jne .L005notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah jne .L005notintel orl $1048576,%edx .L005notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx cmpl $0,%edi je .L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl ja .L002generic andl $4026531839,%edx .L002generic: andl $2048,%ebp andl $4294965247,%ecx movl %edx,%esi orl %ecx,%ebp btl $27,%ecx jnc .L006clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax je .L007done cmpl $2,%eax je .L006clear_avx .L008clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi .L006clear_avx: andl $4026525695,%ebp movl 20(%esp),%edi andl $4294967263,8(%edi) .L007done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: popl %edi popl %esi popl %ebx popl %ebp ret .size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin .globl OPENSSL_rdtsc .type OPENSSL_rdtsc,@function .align 16 OPENSSL_rdtsc: .L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx call .L009PIC_me_up .L009PIC_me_up: popl %ecx leal OPENSSL_ia32cap_P-.L009PIC_me_up(%ecx),%ecx btl $4,(%ecx) jnc .L010notsc .byte 0x0f,0x31 .L010notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt .type OPENSSL_instrument_halt,@function .align 16 OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: call .L011PIC_me_up .L011PIC_me_up: popl %ecx leal OPENSSL_ia32cap_P-.L011PIC_me_up(%ecx),%ecx btl $4,(%ecx) jnc .L012nohalt .long 2421723150 andl $3,%eax jnz .L012nohalt pushfl popl %eax btl $9,%eax jnc .L012nohalt .byte 0x0f,0x31 pushl %edx pushl %eax hlt .byte 0x0f,0x31 subl (%esp),%eax sbbl 4(%esp),%edx addl $8,%esp ret .L012nohalt: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin .globl OPENSSL_far_spin .type OPENSSL_far_spin,@function .align 16 OPENSSL_far_spin: .L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax jnc .L013nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx jmp .L014spin .align 16 .L014spin: incl %eax cmpl (%ecx),%edx je .L014spin .long 529567888 ret .L013nospin: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 16 OPENSSL_wipe_cpu: .L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx call .L015PIC_me_up .L015PIC_me_up: popl %ecx leal OPENSSL_ia32cap_P-.L015PIC_me_up(%ecx),%ecx movl (%ecx),%ecx btl $1,(%ecx) jnc .L016no_x87 andl $83886080,%ecx cmpl $83886080,%ecx jne .L017no_sse2 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .L017no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 .L016no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 OPENSSL_atomic_add: .L_OPENSSL_atomic_add_begin: movl 4(%esp),%edx movl 8(%esp),%ecx pushl %ebx nop movl (%edx),%eax .L018spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 jne .L018spin movl %ebx,%eax popl %ebx ret .size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin .globl OPENSSL_indirect_call .type OPENSSL_indirect_call,@function .align 16 OPENSSL_indirect_call: .L_OPENSSL_indirect_call_begin: pushl %ebp movl %esp,%ebp subl $28,%esp movl 12(%ebp),%ecx movl %ecx,(%esp) movl 16(%ebp),%edx movl %edx,4(%esp) movl 20(%ebp),%eax movl %eax,8(%esp) movl 24(%ebp),%eax movl %eax,12(%esp) movl 28(%ebp),%eax movl %eax,16(%esp) movl 32(%ebp),%eax movl %eax,20(%esp) movl 36(%ebp),%eax movl %eax,24(%esp) call *8(%ebp) movl %ebp,%esp popl %ebp ret .size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin .globl OPENSSL_cleanse .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: .L_OPENSSL_cleanse_begin: movl 4(%esp),%edx movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx jae .L019lot cmpl $0,%ecx je .L020ret .L021little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx jnz .L021little .L020ret: ret .align 16 .L019lot: testl $3,%edx jz .L022aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx jmp .L019lot .L022aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx jnz .L022aligned cmpl $0,%ecx jne .L021little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand .type OPENSSL_ia32_rdrand,@function .align 16 OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx .L023loop: .byte 15,199,240 jc .L024break loop .L023loop .L024break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin .globl OPENSSL_ia32_rdseed .type OPENSSL_ia32_rdseed,@function .align 16 OPENSSL_ia32_rdseed: .L_OPENSSL_ia32_rdseed_begin: movl $8,%ecx .L025loop: .byte 15,199,248 jc .L026break loop .L025loop .L026break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin .hidden OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P .comm OPENSSL_ia32cap_P,16,4 .section .init call OPENSSL_cpuid_setup #else .file "x86cpuid.S" .text .globl OPENSSL_ia32_cpuid .type OPENSSL_ia32_cpuid,@function .align 16 OPENSSL_ia32_cpuid: .L_OPENSSL_ia32_cpuid_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edx,%edx pushfl popl %eax movl %eax,%ecx xorl $2097152,%eax pushl %eax popfl pushfl popl %eax xorl %eax,%ecx xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid movl 20(%esp),%esi movl %eax,8(%esi) .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax cmpl $1970169159,%ebx setne %al movl %eax,%ebp cmpl $1231384169,%edx setne %al orl %eax,%ebp cmpl $1818588270,%ecx setne %al orl %eax,%ebp jz .L001intel cmpl $1752462657,%ebx setne %al movl %eax,%esi cmpl $1769238117,%edx setne %al orl %eax,%esi cmpl $1145913699,%ecx setne %al orl %eax,%esi jnz .L001intel movl $2147483648,%eax .byte 0x0f,0xa2 cmpl $2147483649,%eax jb .L001intel movl %eax,%esi movl $2147483649,%eax .byte 0x0f,0xa2 orl %ecx,%ebp andl $2049,%ebp cmpl $2147483656,%esi jb .L001intel movl $2147483656,%eax .byte 0x0f,0xa2 movzbl %cl,%esi incl %esi movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 btl $28,%edx jnc .L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx ja .L002generic andl $4026531839,%edx jmp .L002generic .L001intel: cmpl $7,%edi jb .L003cacheinfo movl 20(%esp),%esi movl $7,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 movl %ebx,8(%esi) .L003cacheinfo: cmpl $4,%edi movl $-1,%edi jb .L004nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi .L004nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp jne .L005notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah jne .L005notintel orl $1048576,%edx .L005notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx cmpl $0,%edi je .L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl ja .L002generic andl $4026531839,%edx .L002generic: andl $2048,%ebp andl $4294965247,%ecx movl %edx,%esi orl %ecx,%ebp btl $27,%ecx jnc .L006clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax je .L007done cmpl $2,%eax je .L006clear_avx .L008clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi .L006clear_avx: andl $4026525695,%ebp movl 20(%esp),%edi andl $4294967263,8(%edi) .L007done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: popl %edi popl %esi popl %ebx popl %ebp ret .size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin .globl OPENSSL_rdtsc .type OPENSSL_rdtsc,@function .align 16 OPENSSL_rdtsc: .L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) jnc .L009notsc .byte 0x0f,0x31 .L009notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt .type OPENSSL_instrument_halt,@function .align 16 OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) jnc .L010nohalt .long 2421723150 andl $3,%eax jnz .L010nohalt pushfl popl %eax btl $9,%eax jnc .L010nohalt .byte 0x0f,0x31 pushl %edx pushl %eax hlt .byte 0x0f,0x31 subl (%esp),%eax sbbl 4(%esp),%edx addl $8,%esp ret .L010nohalt: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin .globl OPENSSL_far_spin .type OPENSSL_far_spin,@function .align 16 OPENSSL_far_spin: .L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax jnc .L011nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx jmp .L012spin .align 16 .L012spin: incl %eax cmpl (%ecx),%edx je .L012spin .long 529567888 ret .L011nospin: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 16 OPENSSL_wipe_cpu: .L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx movl (%ecx),%ecx btl $1,(%ecx) jnc .L013no_x87 andl $83886080,%ecx cmpl $83886080,%ecx jne .L014no_sse2 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .L014no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 .L013no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 OPENSSL_atomic_add: .L_OPENSSL_atomic_add_begin: movl 4(%esp),%edx movl 8(%esp),%ecx pushl %ebx nop movl (%edx),%eax .L015spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 jne .L015spin movl %ebx,%eax popl %ebx ret .size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin .globl OPENSSL_indirect_call .type OPENSSL_indirect_call,@function .align 16 OPENSSL_indirect_call: .L_OPENSSL_indirect_call_begin: pushl %ebp movl %esp,%ebp subl $28,%esp movl 12(%ebp),%ecx movl %ecx,(%esp) movl 16(%ebp),%edx movl %edx,4(%esp) movl 20(%ebp),%eax movl %eax,8(%esp) movl 24(%ebp),%eax movl %eax,12(%esp) movl 28(%ebp),%eax movl %eax,16(%esp) movl 32(%ebp),%eax movl %eax,20(%esp) movl 36(%ebp),%eax movl %eax,24(%esp) call *8(%ebp) movl %ebp,%esp popl %ebp ret .size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin .globl OPENSSL_cleanse .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: .L_OPENSSL_cleanse_begin: movl 4(%esp),%edx movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx jae .L016lot cmpl $0,%ecx je .L017ret .L018little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx jnz .L018little .L017ret: ret .align 16 .L016lot: testl $3,%edx jz .L019aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx jmp .L016lot .L019aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx jnz .L019aligned cmpl $0,%ecx jne .L018little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand .type OPENSSL_ia32_rdrand,@function .align 16 OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx .L020loop: .byte 15,199,240 jc .L021break loop .L020loop .L021break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin .globl OPENSSL_ia32_rdseed .type OPENSSL_ia32_rdseed,@function .align 16 OPENSSL_ia32_rdseed: .L_OPENSSL_ia32_rdseed_begin: movl $8,%ecx .L022loop: .byte 15,199,248 jc .L023break loop .L022loop .L023break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin .hidden OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P .comm OPENSSL_ia32cap_P,16,4 .section .init call OPENSSL_cpuid_setup #endif