Page MenuHomeFreeBSD

D45444.diff
No OneTemporary

D45444.diff

diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile
--- a/lib/libmd/Makefile
+++ b/lib/libmd/Makefile
@@ -117,9 +117,12 @@
.endif
.if ${USE_ASM_SOURCES} != 0
-.if exists(${MACHINE_ARCH}/sha.S)
-SRCS+= sha.S
+.if exists(${MACHINE_ARCH}/sha1block.S)
+SRCS+= sha1block.S
CFLAGS+= -DSHA1_ASM
+.if exists(${MACHINE_ARCH}/sha1dispatch.c)
+SRCS+= sha1dispatch.c
+.endif
.endif
.if exists(${MACHINE_ARCH}/rmd160.S)
SRCS+= rmd160.S
@@ -135,7 +138,7 @@
# the assembly vs C versions, and skein_block needs to be rebuilt if it changes.
skein_block.o skein_block.pico: Makefile
.endif
-.if exists(${MACHINE_ARCH}/sha.S) || exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.S)
+.if exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.S)
ACFLAGS+= -DELF -Wa,--noexecstack
.endif
.if ${MACHINE_CPUARCH} == "aarch64"
diff --git a/lib/libmd/aarch64/sha1block.S b/lib/libmd/aarch64/sha1block.S
new file mode 100644
--- /dev/null
+++ b/lib/libmd/aarch64/sha1block.S
@@ -0,0 +1,490 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * sha1block_sha1 implementation based on sha1-arm.c,
+ * written and placed in public domain by Jeffrey Walton
+ * based on code from ARM, and by Johannes Schneiders, Skip
+ * Hovsmith and Barry O'Rourke for the mbedTLS project.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * Scalar SHA1 implementation.
+ *
+ * Due to the ample register file available on AArch64, the w array is
+ * kept entirely in registers. The saved a-e variables are instead kept
+ * in memory as we don't have that much memory.
+ */
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_scalar)
+ctx .req x0
+buf .req x1
+len .req x2
+w .req sp
+a .req w3
+b .req w4
+c .req w5
+d .req w6
+e .req w7
+k .req w8
+f .req w9
+tmp .req w10
+w_0 .req w11
+w_1 .req w12
+w_2 .req w13
+w_3 .req w14
+w_4 .req w15
+w_5 .req w16
+w_6 .req w17
+// w18 is the platform register
+w_7 .req w19
+w_8 .req w20
+w_9 .req w21
+w_10 .req w22
+w_11 .req w23
+w_12 .req w24
+w_13 .req w25
+w_14 .req w26
+w_15 .req w27
+
+.macro shuffle w_i, w_i3, w_i8, w_i14
+ eor \w_i, \w_i, \w_i3
+ eor tmp, \w_i8, \w_i14
+ eor \w_i, \w_i, tmp // w[i-16] ^ w[i-14] ^ w[i-8] ^ w[i-3]
+ ror \w_i, \w_i, #31 // w[i] = ... ror #31
+.endm
+
+.macro func1 a, b, c, d, e
+ and f, \c, \b
+ bic tmp, \d, \b
+ orr f, f, tmp
+.endm
+
+.macro func2 a, b, c, d, e
+ eor f, \b, \c
+ eor f, f, \d
+.endm
+
+.macro func3 a, b, c, d, e
+ eor tmp, \b, \c
+ and f, \b, \c
+ and tmp, tmp, \d
+ orr f, f, tmp
+.endm
+
+.macro func4 a, b, c, d, e
+ func2 \a, \b, \c, \d, \e
+.endm
+
+.macro mix a, b, c, d, e, w_i
+ ror \b, \b, #2
+ ror tmp, \a, #27
+ add \e, \e, \w_i
+ add tmp, tmp, k
+ add \e, \e, f
+ add \e, \e, tmp // (a ror 27) + e + f + k + w[i]
+.endm
+
+.macro round1 a, b, c, d, e, w_i
+ func1 \a, \b, \c, \d, \e
+ rev \w_i, \w_i
+ mix \a, \b, \c, \d, \e, \w_i
+.endm
+
+.macro round func, a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ shuffle \w_i, \w_i3, \w_i8, \w_i14
+ \func \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, \w_i
+.endm
+
+.macro round1x a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func1, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round2 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func2, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round3 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func3, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round4 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func4, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+ ands len, len, #~63 // take length in multiples of block length
+ beq 1f // bail out if input empty
+
+ sub sp, sp, #24+9*8 // allocate stack space
+ str x19, [sp, #24+0*8]
+ stp x20, x21, [sp, #24+1*8]
+ stp x22, x23, [sp, #24+3*8]
+ stp x24, x25, [sp, #24+5*8]
+ stp x26, x27, [sp, #24+7*8]
+
+ ldp a, b, [ctx, #0] // load SHA1 state from context
+ ldp c, d, [ctx, #8]
+ ldr e, [ctx, #16]
+
+0: stp a, b, [sp, #0] // save old SHA1 state
+ stp c, d, [sp, #8]
+ str e, [sp, #16]
+
+ movz k, #0x7999 // round constant 1
+ movk k, #0x5a82, lsl #16
+
+ ldp w_0, w_1, [buf, #0*4]
+ round1 a, b, c, d, e, w_0
+ round1 e, a, b, c, d, w_1
+
+ ldp w_2, w_3, [buf, #2*4]
+ round1 d, e, a, b, c, w_2
+ round1 c, d, e, a, b, w_3
+
+ ldp w_4, w_5, [buf, #4*4]
+ round1 b, c, d, e, a, w_4
+ round1 a, b, c, d, e, w_5
+
+ ldp w_6, w_7, [buf, #6*4]
+ round1 e, a, b, c, d, w_6
+ round1 d, e, a, b, c, w_7
+
+ ldp w_8, w_9, [buf, #8*4]
+ round1 c, d, e, a, b, w_8
+ round1 b, c, d, e, a, w_9
+
+ ldp w_10, w_11, [buf, #10*4]
+ round1 a, b, c, d, e, w_10
+ round1 e, a, b, c, d, w_11
+
+ ldp w_12, w_13, [buf, #12*4]
+ round1 d, e, a, b, c, w_12
+ round1 c, d, e, a, b, w_13
+
+ ldp w_14, w_15, [buf, #14*4]
+ round1 b, c, d, e, a, w_14
+ round1 a, b, c, d, e, w_15
+
+ round1x e, a, b, c, d, w_0, w_13, w_8, w_2
+ round1x d, e, a, b, c, w_1, w_14, w_9, w_3
+ round1x c, d, e, a, b, w_2, w_15, w_10, w_4
+ round1x b, c, d, e, a, w_3, w_0, w_11, w_5
+
+ movz k, #0xeba1 // round constant 2
+ movk k, #0x6ed9, lsl #16
+
+ round2 a, b, c, d, e, w_4, w_1, w_12, w_6
+ round2 e, a, b, c, d, w_5, w_2, w_13, w_7
+ round2 d, e, a, b, c, w_6, w_3, w_14, w_8
+ round2 c, d, e, a, b, w_7, w_4, w_15, w_9
+ round2 b, c, d, e, a, w_8, w_5, w_0, w_10
+
+ round2 a, b, c, d, e, w_9, w_6, w_1, w_11
+ round2 e, a, b, c, d, w_10, w_7, w_2, w_12
+ round2 d, e, a, b, c, w_11, w_8, w_3, w_13
+ round2 c, d, e, a, b, w_12, w_9, w_4, w_14
+ round2 b, c, d, e, a, w_13, w_10, w_5, w_15
+
+ round2 a, b, c, d, e, w_14, w_11, w_6, w_0
+ round2 e, a, b, c, d, w_15, w_12, w_7, w_1
+ round2 d, e, a, b, c, w_0, w_13, w_8, w_2
+ round2 c, d, e, a, b, w_1, w_14, w_9, w_3
+ round2 b, c, d, e, a, w_2, w_15, w_10, w_4
+
+ round2 a, b, c, d, e, w_3, w_0, w_11, w_5
+ round2 e, a, b, c, d, w_4, w_1, w_12, w_6
+ round2 d, e, a, b, c, w_5, w_2, w_13, w_7
+ round2 c, d, e, a, b, w_6, w_3, w_14, w_8
+ round2 b, c, d, e, a, w_7, w_4, w_15, w_9
+
+ movz k, #0xbcdc // round constant 3
+ movk k, #0x8f1b, lsl #16
+
+ round3 a, b, c, d, e, w_8, w_5, w_0, w_10
+ round3 e, a, b, c, d, w_9, w_6, w_1, w_11
+ round3 d, e, a, b, c, w_10, w_7, w_2, w_12
+ round3 c, d, e, a, b, w_11, w_8, w_3, w_13
+ round3 b, c, d, e, a, w_12, w_9, w_4, w_14
+
+ round3 a, b, c, d, e, w_13, w_10, w_5, w_15
+ round3 e, a, b, c, d, w_14, w_11, w_6, w_0
+ round3 d, e, a, b, c, w_15, w_12, w_7, w_1
+ round3 c, d, e, a, b, w_0, w_13, w_8, w_2
+ round3 b, c, d, e, a, w_1, w_14, w_9, w_3
+
+ round3 a, b, c, d, e, w_2, w_15, w_10, w_4
+ round3 e, a, b, c, d, w_3, w_0, w_11, w_5
+ round3 d, e, a, b, c, w_4, w_1, w_12, w_6
+ round3 c, d, e, a, b, w_5, w_2, w_13, w_7
+ round3 b, c, d, e, a, w_6, w_3, w_14, w_8
+
+ round3 a, b, c, d, e, w_7, w_4, w_15, w_9
+ round3 e, a, b, c, d, w_8, w_5, w_0, w_10
+ round3 d, e, a, b, c, w_9, w_6, w_1, w_11
+ round3 c, d, e, a, b, w_10, w_7, w_2, w_12
+ round3 b, c, d, e, a, w_11, w_8, w_3, w_13
+
+ movz k, #0xc1d6 // round constant 4
+ movk k, #0xca62, lsl #16
+
+ round4 a, b, c, d, e, w_12, w_9, w_4, w_14
+ round4 e, a, b, c, d, w_13, w_10, w_5, w_15
+ round4 d, e, a, b, c, w_14, w_11, w_6, w_0
+ round4 c, d, e, a, b, w_15, w_12, w_7, w_1
+ round4 b, c, d, e, a, w_0, w_13, w_8, w_2
+
+ round4 a, b, c, d, e, w_1, w_14, w_9, w_3
+ round4 e, a, b, c, d, w_2, w_15, w_10, w_4
+ round4 d, e, a, b, c, w_3, w_0, w_11, w_5
+ round4 c, d, e, a, b, w_4, w_1, w_12, w_6
+ round4 b, c, d, e, a, w_5, w_2, w_13, w_7
+
+ round4 a, b, c, d, e, w_6, w_3, w_14, w_8
+ round4 e, a, b, c, d, w_7, w_4, w_15, w_9
+ round4 d, e, a, b, c, w_8, w_5, w_0, w_10
+ round4 c, d, e, a, b, w_9, w_6, w_1, w_11
+ round4 b, c, d, e, a, w_10, w_7, w_2, w_12
+
+ round4 a, b, c, d, e, w_11, w_8, w_3, w_13
+ round4 e, a, b, c, d, w_12, w_9, w_4, w_14
+ round4 d, e, a, b, c, w_13, w_10, w_5, w_15
+ round4 c, d, e, a, b, w_14, w_11, w_6, w_0
+ round4 b, c, d, e, a, w_15, w_12, w_7, w_1
+
+ ldp w_0, w_1, [sp, #0] // reload saved SHA1 state
+ ldp w_2, w_3, [sp, #8]
+ ldr w_4, [sp, #16]
+
+ add a, a, w_0
+ add b, b, w_1
+ add c, c, w_2
+ add d, d, w_3
+ add e, e, w_4
+
+ add buf, buf, #64
+ subs len, len, #64
+ bhi 0b
+
+ stp a, b, [ctx, #0] // write updated SHA1 state
+ stp c, d, [ctx, #8]
+ str e, [ctx, #16]
+
+ ldr x19, [sp, #24+0*8]
+ ldp x20, x21, [sp, #24+1*8]
+ ldp x22, x23, [sp, #24+3*8]
+ ldp x24, x25, [sp, #24+5*8]
+ ldp x26, x27, [sp, #24+7*8]
+ add sp, sp, #24+9*8
+
+1: ret
+END(_libmd_sha1block_scalar)
+
+/*
+ * SHA1 implementation using the SHA1 instruction set extension.
+ */
+
+ .arch_extension sha2
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_sha1)
+ /* ctx, buf, len: same as for sha1block_scalar */
+kaddr .req x3
+abcd .req v0
+abcd_q .req q0 // alias for use with scalar instructions
+abcd_s .req s0
+e0 .req s1
+e0_v .req v1
+e1 .req s2
+abcd_saved .req v3
+e0_saved .req v4
+tmp0 .req v5
+tmp1 .req v6
+msg0 .req v16
+msg1 .req v17
+msg2 .req v18
+msg3 .req v19
+k0 .req v20
+k1 .req v21
+k2 .req v22
+k3 .req v23
+
+ ands len, len, #~63 // take length in multiples of block length
+ beq 1f // bail out if input empty
+
+ ldr abcd_q, [ctx, #0]
+ ldr e0, [ctx, #16]
+
+ adrp kaddr, k1234
+ add kaddr, kaddr, #:lo12:k1234
+ ld4r {k0.4s, k1.4s, k2.4s, k3.4s}, [kaddr]
+
+0: mov abcd_saved.16b, abcd.16b
+ mov e0_saved.16b, e0_v.16b
+
+ ld1 {msg0.4s, msg1.4s, msg2.4s, msg3.4s}, [buf], #64
+ rev32 msg0.16b, msg0.16b
+ rev32 msg1.16b, msg1.16b
+ rev32 msg2.16b, msg2.16b
+ rev32 msg3.16b, msg3.16b
+
+ add tmp0.4s, msg0.4s, k0.4s
+ add tmp1.4s, msg1.4s, k0.4s
+
+ /* rounds 0--3 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k0.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 4--7 */
+ sha1h e0, abcd_s
+ sha1c abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k0.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 8--11 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k0.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 12--15 */
+ sha1h e0, abcd_s
+ sha1c abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k1.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 16--19 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k1.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 20--23 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k1.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 24--27 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k1.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 28--31 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k1.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 32--35 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k2.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 36--39 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k2.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 40--43 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k2.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 44--47 */
+ sha1h e0, abcd_s
+ sha1m abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k2.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 48--51 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k2.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 52--55 */
+ sha1h e0, abcd_s
+ sha1m abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k3.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 56--59 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k3.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 60--63 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k3.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 64--67 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k3.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 68--71 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k3.4s
+ sha1su1 msg0.4s, msg3.4s
+
+ /* rounds 72--75 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+
+ /* rounds 76--79 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+
+ add e0_v.4s, e0_v.4s, e0_saved.4s
+ add abcd.4s, abcd.4s, abcd_saved.4s
+
+ subs len, len, #64
+ bhi 0b
+
+ str abcd_q, [ctx, #0]
+ str e0, [ctx, #16]
+
+1: ret
+END(_libmd_sha1block_sha1)
+
+ .section .rodata
+ .balign 16
+k1234: .4byte 0x5a827999
+ .4byte 0x6ed9eba1
+ .4byte 0x8f1bbcdc
+ .4byte 0xca62c1d6
+ .size k1234, .-k1234
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/aarch64/sha1dispatch.c b/lib/libmd/aarch64/sha1dispatch.c
new file mode 100644
--- /dev/null
+++ b/lib/libmd/aarch64/sha1dispatch.c
@@ -0,0 +1,24 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <machine/ifunc.h>
+#include <sha.h>
+#include <sys/auxv.h>
+
+extern void _libmd_sha1block_scalar(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_sha1(SHA1_CTX *, const void *, size_t);
+
+DEFINE_IFUNC(, void, sha1_block, (SHA1_CTX *, const void *, size_t))
+{
+ unsigned long hwcap = 0;
+
+ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
+
+ if (hwcap & HWCAP_SHA1)
+ return (_libmd_sha1block_sha1);
+ else
+ return (_libmd_sha1block_scalar);
+}
diff --git a/lib/libmd/amd64/sha1block.S b/lib/libmd/amd64/sha1block.S
new file mode 100644
--- /dev/null
+++ b/lib/libmd/amd64/sha1block.S
@@ -0,0 +1,1851 @@
+/*-
+ * Copyright (c) 2013 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1block_amd64.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * SHA-1 block routine. See sha1c.c for C equivalent.
+ *
+ * There are 80 rounds of 4 types:
+ * - rounds 0-15 are type 1 and load data (round1 macro).
+ * - rounds 16-19 are type 1 and do not load data (round1x macro).
+ * - rounds 20-39 are type 2 and do not load data (round2 macro).
+ * - rounds 40-59 are type 3 and do not load data (round3 macro).
+ * - rounds 60-79 are type 4 and do not load data (round4 macro).
+ *
+ * Each round loads or shuffles the data, then computes a per-round
+ * function of b, c, d, and then mixes the result into and rotates the
+ * five registers a, b, c, d, e holding the intermediate results.
+ *
+ * The register rotation is implemented by rotating the arguments to
+ * the round macros instead of by explicit move instructions.
+ */
+.macro load index
+ mov (\index)*4(%rsi), %r10d
+ bswap %r10d
+ mov %r10d, (\index)*4(%rsp)
+.endm
+
+.macro shuffle index
+ mov ((\index )&0xf)*4(%rsp), %r10d
+ xor ((\index- 3)&0xf)*4(%rsp), %r10d
+ xor ((\index- 8)&0xf)*4(%rsp), %r10d
+ xor ((\index-14)&0xf)*4(%rsp), %r10d
+ rol $1, %r10d
+ mov %r10d, ((\index)&0xf)*4(%rsp)
+.endm
+
+.macro func1 a, b, c, d, e
+ mov \d, %r9d
+ xor \c, %r9d
+ and \b, %r9d
+ xor \d, %r9d
+.endm
+
+.macro func2 a, b, c, d, e
+ mov \b, %r9d
+ xor \c, %r9d
+ xor \d, %r9d
+.endm
+
+.macro func3 a, b, c, d, e
+ mov \b, %r8d
+ or \c, %r8d
+ and \d, %r8d
+ mov \b, %r9d
+ and \c, %r9d
+ or %r8d, %r9d
+.endm
+
+.macro func4 a, b, c, d, e
+ func2 \a, \b, \c, \d, \e
+.endm
+
+.macro mix a, b, c, d, e, const
+ rol $30, \b
+ add %r9d, \e
+ mov \a, %r8d
+ rol $5, %r8d
+ lea \const(\e, %r10d, 1), \e
+ add %r8d, \e
+.endm
+
+.macro round1 a, b, c, d, e, index
+ load \index
+ func1 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x5a827999
+.endm
+
+.macro round1x a, b, c, d, e, index
+ shuffle \index
+ func1 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x5a827999
+.endm
+
+.macro round2 a, b, c, d, e, index
+ shuffle \index
+ func2 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x6ed9eba1
+.endm
+
+.macro round3 a, b, c, d, e, index
+ shuffle \index
+ func3 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x8f1bbcdc
+.endm
+
+.macro round4 a, b, c, d, e, index
+ shuffle \index
+ func4 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0xca62c1d6
+.endm
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_scalar)
+ push %rbp
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rdi // rdi: SHA1_CTX
+ sub $64+8, %rsp // 64 bytes for round keys
+ // plus alignment
+
+ mov %rdi, %rbp
+ // rsi: buf
+ and $~63, %rdx // rdx: length in blocks
+ lea (%rsi, %rdx, 1), %rdi // rdi: end pointer
+ mov (%rbp), %eax // c->h0
+ mov 4(%rbp), %ebx // c->h1
+ mov 8(%rbp), %ecx // c->h2
+ mov 12(%rbp), %edx // c->h3
+ mov 16(%rbp), %ebp // c->h4
+
+ cmp %rsi, %rdi // any data to process?
+ je .Lend
+
+.Lloop: mov %eax, %r11d
+ mov %ebx, %r12d
+ mov %ecx, %r13d
+ mov %edx, %r14d
+ mov %ebp, %r15d
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 0
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 1
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 2
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 3
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 4
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 5
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 6
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 7
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 8
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 9
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 10
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 11
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 12
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 13
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 14
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 15
+ round1x %ebp, %eax, %ebx, %ecx, %edx, 16
+ round1x %edx, %ebp, %eax, %ebx, %ecx, 17
+ round1x %ecx, %edx, %ebp, %eax, %ebx, 18
+ round1x %ebx, %ecx, %edx, %ebp, %eax, 19
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 20
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 21
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 22
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 23
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 24
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 25
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 26
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 27
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 28
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 29
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 30
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 31
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 32
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 33
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 34
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 35
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 36
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 37
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 38
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 39
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 40
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 41
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 42
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 43
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 44
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 45
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 46
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 47
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 48
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 49
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 50
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 51
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 52
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 53
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 54
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 55
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 56
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 57
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 58
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 59
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 60
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 61
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 62
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 63
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 64
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 65
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 66
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 67
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 68
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 69
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 70
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 71
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 72
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 73
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 74
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 75
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 76
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 77
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 78
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 79
+
+ add %r11d, %eax
+ add %r12d, %ebx
+ add %r13d, %ecx
+ add %r14d, %edx
+ add %r15d, %ebp
+
+ add $64, %rsi
+ cmp %rdi, %rsi
+ jb .Lloop
+
+.Lend: add $64+8, %rsp
+ pop %rdi // SHA1_CTX
+ mov %eax, (%rdi)
+ mov %ebx, 4(%rdi)
+ mov %ecx, 8(%rdi)
+ mov %edx, 12(%rdi)
+ mov %ebp, 16(%rdi)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ pop %rbp
+ ret
+END(_libmd_sha1block_scalar)
+
+/*
+ * This is the implementation using AVX2, BMI1 and BMI2. It is based on:
+ * "SHA-1 implementation with Intel(R) AVX2 instruction set extensions"
+ * From http://software.intel.com/en-us/articles
+ * (look for improving-the-performance-of-the-secure-hash-algorithm-1)
+ * This implementation is 2x unrolled, and interleaves vector instructions,
+ * used to precompute W, with scalar computation of current round
+ * for optimal scheduling.
+ */
+
+ /* trivial helper macros */
+.macro update_hash a, tb, c, d, e
+ add (%r9), \a
+ mov \a, (%r9)
+ add 4(%r9), \tb
+ mov \tb, 4(%r9)
+ add 8(%r9), \c
+ mov \c, 8(%r9)
+ add 12(%r9), \d
+ mov \d, 12(%r9)
+ add 16(%r9), \e
+ mov \e, 16(%r9)
+.endm
+
+ /* help macros for recalc, which does precomputations */
+.macro precalc0 offset
+ vmovdqu \offset(%r10), %xmm0
+.endm
+
+.macro precalc1 offset
+ vinserti128 $1, \offset(%r13), %ymm0, %ymm0
+.endm
+
+.macro precalc2 yreg
+ vpshufb %ymm10, %ymm0, \yreg
+.endm
+
+.macro precalc4 yreg, k_offset
+ vpaddd \k_offset(%r8), \yreg, %ymm0
+.endm
+
+.macro precalc7 offset
+ vmovdqu %ymm0, (\offset)*2(%r14)
+.endm
+
+/*
+ * Message scheduling pre-compute for rounds 0-15
+ * r13 is a pointer to the even 64-byte block
+ * r10 is a pointer to the odd 64-byte block
+ * r14 is a pointer to the temp buffer
+ * xmm0 is used as a temp register
+ * yreg is clobbered as part of the computation
+ * offset chooses a 16 byte chunk within a block
+ * r8 is a pointer to the constants block
+ * k_offset chooses K constants relevant to this round
+ * xmm10 holds the swap mask
+ */
+.macro precalc00_15 offset, yreg
+ precalc0 \offset
+ precalc1 \offset
+ precalc2 \yreg
+ precalc4 \yreg, 0
+ precalc7 \offset
+.endm
+
+ /* helper macros for precalc16_31 */
+.macro precalc16 reg_sub16, reg_sub12, reg_sub4, reg
+ vpalignr $8, \reg_sub16, \reg_sub12, \reg // w[i - 14]
+ vpsrldq $4, \reg_sub4, %ymm0 // w[i - 3]
+.endm
+
+.macro precalc17 reg_sub16, reg_sub8, reg
+ vpxor \reg_sub8, \reg, \reg
+ vpxor \reg_sub16, %ymm0, %ymm0
+.endm
+
+.macro precalc18 reg
+ vpxor %ymm0, \reg, \reg
+ vpslldq $12, \reg, %ymm9
+.endm
+
+.macro precalc19 reg
+ vpslld $1, \reg, %ymm0
+ vpsrld $31, \reg, \reg
+ .endm
+
+.macro precalc20 reg
+ vpor \reg, %ymm0, %ymm0
+ vpslld $2, %ymm9, \reg
+.endm
+
+.macro precalc21 reg
+ vpsrld $30, %ymm9, %ymm9
+ vpxor \reg, %ymm0, %ymm0
+.endm
+
+.macro precalc23 reg, k_offset, offset
+ vpxor %ymm9, %ymm0, \reg
+ vpaddd \k_offset(%r8), \reg, %ymm0
+ vmovdqu %ymm0, (\offset)(%r14)
+.endm
+
+/*
+ * Message scheduling pre-compute for rounds 16-31
+ * calculating last 32 w[i] values in 8 XMM registers
+ * pre-calculate K+w[i] values and store to mem
+ * for later load by ALU add instruction.
+ * "brute force" vectorization for rounds 16-31 only
+ * due to w[i]->w[i-3] dependency.
+ + clobbers 5 input ymm registers REG_SUB*
+ * uses xmm0 and xmm9 as temp registers
+ * As always, r8 is a pointer to constants block
+ * and r14 is a pointer to temp buffer
+ */
+.macro precalc16_31 reg, reg_sub4, reg_sub8, reg_sub12, reg_sub16, k_offset, offset
+ precalc16 \reg_sub16, \reg_sub12, \reg_sub4, \reg
+ precalc17 \reg_sub16, \reg_sub8, \reg
+ precalc18 \reg
+ precalc19 \reg
+ precalc20 \reg
+ precalc21 \reg
+ precalc23 \reg, \k_offset, \offset
+.endm
+
+ /* helper macros for precalc_32_79 */
+.macro precalc32 reg_sub8, reg_sub4
+ vpalignr $8, \reg_sub8, \reg_sub4, %ymm0
+.endm
+
+.macro precalc33 reg_sub28, reg
+ vpxor \reg_sub28, \reg, \reg
+.endm
+
+.macro precalc34 reg_sub16
+ vpxor \reg_sub16, %ymm0, %ymm0
+.endm
+
+.macro precalc35 reg
+ vpxor %ymm0, \reg, \reg
+.endm
+
+.macro precalc36 reg
+ vpslld $2, \reg, %ymm0
+.endm
+
+.macro precalc37 reg
+ vpsrld $30, \reg, \reg
+ vpor \reg, %ymm0, \reg
+.endm
+
+.macro precalc39 reg, k_offset, offset
+ vpaddd \k_offset(%r8), \reg, %ymm0
+ vmovdqu %ymm0, \offset(%r14)
+.endm
+
+.macro precalc32_79 reg, reg_sub4, reg_sub8, reg_sub16, reg_sub28, k_offset, offset
+ precalc32 \reg_sub8, \reg_sub4
+ precalc33 \reg_sub28, \reg
+ precalc34 \reg_sub16
+ precalc35 \reg
+ precalc36 \reg
+ precalc37 \reg
+ precalc39 \reg, \k_offset, \offset
+.endm
+
+.macro precalc
+ precalc00_15 0x00, %ymm15
+ precalc00_15 0x10, %ymm14
+ precalc00_15 0x20, %ymm13
+ precalc00_15 0x30, %ymm12
+ precalc16_31 %ymm8, %ymm12, %ymm13, %ymm14, %ymm15, 0x00, 0x080
+ precalc16_31 %ymm7, %ymm8, %ymm12, %ymm13, %ymm14, 0x20, 0x0a0
+ precalc16_31 %ymm5, %ymm7, %ymm8, %ymm12, %ymm13, 0x20, 0x0c0
+ precalc16_31 %ymm3, %ymm5, %ymm7, %ymm8, %ymm12, 0x20, 0x0e0
+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x20, 0x100
+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x20, 0x120
+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x40, 0x140
+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x40, 0x160
+ precalc32_79 %ymm8, %ymm12, %ymm13, %ymm15, %ymm7, 0x40, 0x180
+ precalc32_79 %ymm7, %ymm8, %ymm12, %ymm14, %ymm5, 0x40, 0x1a0
+ precalc32_79 %ymm5, %ymm7, %ymm8, %ymm13, %ymm3, 0x40, 0x1c0
+ precalc32_79 %ymm3, %ymm5, %ymm7, %ymm12, %ymm15, 0x60, 0x1e0
+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x60, 0x200
+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x60, 0x220
+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x60, 0x240
+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x60, 0x260
+.endm
+
+/*
+ * Macros calculating individual rounds have general form
+ * calc_round_pre + precalc_round + calc_round_post
+ * calc_round_{pre,post} macros follow
+ */
+.macro calc_f1_pre offset, reg_a, reg_b, reg_c, reg_e
+ add \offset(%r15), \reg_e
+ andn \reg_c, \reg_a, %ebp
+ add \reg_b, \reg_e // add F from the previous round
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_b // for the next round
+.endm
+
+/*
+ * Calculate F for the next round
+ */
+.macro calc_f1_post reg_a, reg_b, reg_e
+ and \reg_b, \reg_a // b & c
+ xor %ebp, \reg_a // F1 = (b&c) ^ (~b&d)
+ add %r12d, \reg_e
+.endm
+
+/*
+ * Registers are cyclically rotated:
+ * edx -> eax -> edi -> esi -> ebx -> ecx
+ */
+.macro calc0
+ mov %esi, %ebx // precalculate first round
+ rorx $2, %esi, %esi
+ andn %eax, %ebx, %ebp
+ and %edi, %ebx
+ xor %ebp, %ebx
+ calc_f1_pre 0x0, %ecx, %ebx, %edi, %edx
+ precalc0 0x80
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc1
+ calc_f1_pre 0x4, %edx, %ecx, %esi, %eax
+ precalc1 0x80
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc2
+ calc_f1_pre 0x8, %eax, %edx, %ebx, %edi
+ precalc2 %ymm15
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc3
+ calc_f1_pre 0xc, %edi, %eax, %ecx, %esi
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc4
+ calc_f1_pre 0x20, %esi, %edi, %edx, %ebx
+ precalc4 %ymm15, 0x0
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc5
+ calc_f1_pre 0x24, %ebx, %esi, %eax, %ecx
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc6
+ calc_f1_pre 0x28, %ecx, %ebx, %edi, %edx
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc7
+ calc_f1_pre 0x2c, %edx, %ecx, %esi, %eax
+ precalc7 0x0
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc8
+ calc_f1_pre 0x40, %eax, %edx, %ebx, %edi
+ precalc0 0x90
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc9
+ calc_f1_pre 0x44, %edi, %eax, %ecx, %esi
+ precalc1 0x90
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc10
+ calc_f1_pre 0x48, %esi, %edi, %edx, %ebx
+ precalc2 %ymm14
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc11
+ calc_f1_pre 0x4c, %ebx, %esi, %eax, %ecx
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc12
+ calc_f1_pre 0x60, %ecx, %ebx, %edi, %edx
+ precalc4 %ymm14, 0
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc13
+ calc_f1_pre 0x64, %edx, %ecx, %esi, %eax
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc14
+ calc_f1_pre 0x68, %eax, %edx, %ebx, %edi
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc15
+ calc_f1_pre 0x6c, %edi, %eax, %ecx, %esi
+ precalc7 0x10
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc16
+ calc_f1_pre 0x80, %esi, %edi, %edx, %ebx
+ precalc0 0xa0
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc17
+ calc_f1_pre 0x84, %ebx, %esi, %eax, %ecx
+ precalc1 0xa0
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc18
+ calc_f1_pre 0x88, %ecx, %ebx, %edi, %edx
+ precalc2 %ymm13
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc_f2_pre offset, reg_a, reg_b, reg_e
+ add \offset(%r15), \reg_e
+ add \reg_b, \reg_e // add F from the previous round
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_b // for next round
+.endm
+
+.macro calc_f2_post reg_a, reg_b, reg_c, reg_e
+ xor \reg_b, \reg_a
+ add %r12d, \reg_e
+ xor \reg_c, \reg_a
+.endm
+
+.macro calc19
+ calc_f2_pre 0x8c, %edx, %ecx, %eax
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc20
+ calc_f2_pre 0xa0, %eax, %edx, %edi
+ precalc4 %ymm13, 0x0
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc21
+ calc_f2_pre 0xa4, %edi, %eax, %esi
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc22
+ calc_f2_pre 0xa8, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc23
+ calc_f2_pre 0xac, %ebx, %esi, %ecx
+ precalc7 0x20
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc24
+ calc_f2_pre 0xc0, %ecx, %ebx, %edx
+ precalc0 0xb0
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc25
+ calc_f2_pre 0xc4, %edx, %ecx, %eax
+ precalc1 0xb0
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc26
+ calc_f2_pre 0xc8, %eax, %edx, %edi
+ precalc2 %ymm12
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc27
+ calc_f2_pre 0xcc, %edi, %eax, %esi
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc28
+ calc_f2_pre 0xe0, %esi, %edi, %ebx
+ precalc4 %ymm12, 0x0
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc29
+ calc_f2_pre 0xe4, %ebx, %esi, %ecx
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc30
+ calc_f2_pre 0xe8, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc31
+ calc_f2_pre 0xec, %edx, %ecx, %eax
+ precalc7 0x30
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc32
+ calc_f2_pre 0x100, %eax, %edx, %edi
+ precalc16 %ymm15, %ymm14, %ymm12, %ymm8
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc33
+ calc_f2_pre 0x104, %edi, %eax, %esi
+ precalc17 %ymm15, %ymm13, %ymm8
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc34
+ calc_f2_pre 0x108, %esi, %edi, %ebx
+ precalc18 %ymm8
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc35
+ calc_f2_pre 0x10c, %ebx, %esi, %ecx
+ precalc19 %ymm8
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc36
+ calc_f2_pre 0x120, %ecx, %ebx, %edx
+ precalc20 %ymm8
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc37
+ calc_f2_pre 0x124, %edx, %ecx, %eax
+ precalc21 %ymm8
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc38
+ calc_f2_pre 0x128, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc_f3_pre offset, reg_e
+ add \offset(%r15), \reg_e
+.endm
+
+.macro calc_f3_post reg_a, reg_b, reg_c, reg_e, reg_tb
+ add \reg_tb, \reg_e // add F from the previous round
+ mov \reg_b, %ebp
+ or \reg_a, %ebp
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_tb
+ and \reg_c, %ebp // calculate F for the next round
+ and \reg_b, \reg_a
+ or %ebp, \reg_a
+ add %r12d, \reg_e
+.endm
+
+.macro calc39
+ calc_f3_pre 0x12c, %esi
+ precalc23 %ymm8, 0x0, 0x80
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc40
+ calc_f3_pre 0x140, %ebx
+ precalc16 %ymm14, %ymm13, %ymm8, %ymm7
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc41
+ calc_f3_pre 0x144, %ecx
+ precalc17 %ymm14, %ymm12, %ymm7
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc42
+ calc_f3_pre 0x148, %edx
+ precalc18 %ymm7
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc43
+ calc_f3_pre 0x14c, %eax
+ precalc19 %ymm7
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc44
+ calc_f3_pre 0x160, %edi
+ precalc20 %ymm7
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc45
+ calc_f3_pre 0x164, %esi
+ precalc21 %ymm7
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc46
+ calc_f3_pre 0x168, %ebx
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc47
+ calc_f3_pre 0x16c, %ecx
+ vpxor %ymm9, %ymm0, %ymm7
+ vpaddd 0x20(%r8), %ymm7, %ymm0
+ vmovdqu %ymm0, 0xa0(%r14)
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc48
+ calc_f3_pre 0x180, %edx
+ precalc16 %ymm13, %ymm12, %ymm7, %ymm5
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc49
+ calc_f3_pre 0x184, %eax
+ precalc17 %ymm13, %ymm8, %ymm5
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc50
+ calc_f3_pre 0x188, %edi
+ precalc18 %ymm5
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc51
+ calc_f3_pre 0x18c, %esi
+ precalc19 %ymm5
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc52
+ calc_f3_pre 0x1a0, %ebx
+ precalc20 %ymm5
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc53
+ calc_f3_pre 0x1a4, %ecx
+ precalc21 %ymm5
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc54
+ calc_f3_pre 0x1a8, %edx
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc55
+ calc_f3_pre 0x1ac, %eax
+ precalc23 %ymm5, 0x20, 0xc0
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc56
+ calc_f3_pre 0x1c0, %edi
+ precalc16 %ymm12, %ymm8, %ymm5, %ymm3
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc57
+ calc_f3_pre 0x1c4, %esi
+ precalc17 %ymm12, %ymm7, %ymm3
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc58
+ calc_f3_pre 0x1c8, %ebx
+ precalc18 %ymm3
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc59
+ calc_f2_pre 0x1cc, %ebx, %esi, %ecx
+ precalc19 %ymm3
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc60
+ calc_f2_pre 0x1e0, %ecx, %ebx, %edx
+ precalc20 %ymm3
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc61
+ calc_f2_pre 0x1e4, %edx, %ecx, %eax
+ precalc21 %ymm3
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc62
+ calc_f2_pre 0x1e8, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc63
+ calc_f2_pre 0x1ec, %edi, %eax, %esi
+ precalc23 %ymm3, 0x20, 0xe0
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc64
+ calc_f2_pre 0x200, %esi, %edi, %ebx
+ precalc32 %ymm5, %ymm3
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc65
+ calc_f2_pre 0x204, %ebx, %esi, %ecx
+ precalc33 %ymm14, %ymm15
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc66
+ calc_f2_pre 0x208, %ecx, %ebx, %edx
+ precalc34 %ymm8
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc67
+ calc_f2_pre 0x20c, %edx, %ecx, %eax
+ precalc35 %ymm15
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc68
+ calc_f2_pre 0x220, %eax, %edx, %edi
+ precalc36 %ymm15
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc69
+ calc_f2_pre 0x224, %edi, %eax, %esi
+ precalc37 %ymm15
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc70
+ calc_f2_pre 0x228, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc71
+ calc_f2_pre 0x22c, %ebx, %esi, %ecx
+ precalc39 %ymm15, 0x20, 0x100
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc72
+ calc_f2_pre 0x240, %ecx, %ebx, %edx
+ precalc32 %ymm3, %ymm15
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc73
+ calc_f2_pre 0x244, %edx, %ecx, %eax
+ precalc33 %ymm13, %ymm14
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc74
+ calc_f2_pre 0x248, %eax, %edx, %edi
+ precalc34 %ymm7
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc75
+ calc_f2_pre 0x24c, %edi, %eax, %esi
+ precalc35 %ymm14
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc76
+ calc_f2_pre 0x260, %esi, %edi, %ebx
+ precalc36 %ymm14
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc77
+ calc_f2_pre 0x264, %ebx, %esi, %ecx
+ precalc37 %ymm14
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc78
+ calc_f2_pre 0x268, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc79
+ add 0x26c(%r15), %eax
+ add %ecx, %eax
+ rorx $0x1b, %edx, %r12d
+ precalc39 %ymm14, 0x20, 0x120
+ add %r12d, %eax
+.endm
+
+/*
+ * Similar to calc0
+ */
+.macro calc80
+ mov %ecx, %edx // precalculate first round
+ rorx $2, %ecx, %ecx
+ andn %esi, %edx, %ebp
+ and %ebx, %edx
+ xor %ebp, %edx
+ calc_f1_pre 0x10, %eax, %edx, %ebx, %edi
+ precalc32 %ymm15, %ymm14
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc81
+ calc_f1_pre 0x14, %edi, %eax, %ecx, %esi
+ precalc33 %ymm12, %ymm13
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc82
+ calc_f1_pre 0x18, %esi, %edi, %edx, %ebx
+ precalc34 %ymm5
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc83
+ calc_f1_pre 0x1c, %ebx, %esi, %eax, %ecx
+ precalc35 %ymm13
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc84
+ calc_f1_pre 0x30, %ecx, %ebx, %edi, %edx
+ precalc36 %ymm13
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc85
+ calc_f1_pre 0x34, %edx, %ecx, %esi, %eax
+ precalc37 %ymm13
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc86
+ calc_f1_pre 0x38, %eax, %edx, %ebx, %edi
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc87
+ calc_f1_pre 0x3c, %edi, %eax, %ecx, %esi
+ precalc39 %ymm13, 0x40, 0x140
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc88
+ calc_f1_pre 0x50, %esi, %edi, %edx, %ebx
+ precalc32 %ymm14, %ymm13
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc89
+ calc_f1_pre 0x54, %ebx, %esi, %eax, %ecx
+ precalc33 %ymm8, %ymm12
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc90
+ calc_f1_pre 0x58, %ecx, %ebx, %edi, %edx
+ precalc34 %ymm3
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc91
+ calc_f1_pre 0x5c, %edx, %ecx, %esi, %eax
+ precalc35 %ymm12
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc92
+ calc_f1_pre 0x70, %eax, %edx, %ebx, %edi
+ precalc36 %ymm12
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc93
+ calc_f1_pre 0x74, %edi, %eax, %ecx, %esi
+ precalc37 %ymm12
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc94
+ calc_f1_pre 0x78, %esi, %edi, %edx, %ebx
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc95
+ calc_f1_pre 0x7c, %ebx, %esi, %eax, %ecx
+ precalc39 %ymm12, 0x40, 0x160
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc96
+ calc_f1_pre 0x90, %ecx, %ebx, %edi, %edx
+ precalc32 %ymm13, %ymm12
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc97
+ calc_f1_pre 0x94, %edx, %ecx, %esi, %eax
+ precalc33 %ymm7, %ymm8
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc98
+ calc_f1_pre 0x98, %eax, %edx, %ebx, %edi
+ precalc34 %ymm15
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc99
+ calc_f2_pre 0x9c, %edi, %eax, %esi
+ precalc35 %ymm8
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc100
+ calc_f2_pre 0xb0, %esi, %edi, %ebx
+ precalc36 %ymm8
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc101
+ calc_f2_pre 0xb4, %ebx, %esi, %ecx
+ precalc37 %ymm8
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc102
+ calc_f2_pre 0xb8, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc103
+ calc_f2_pre 0xbc, %edx, %ecx, %eax
+ precalc39 %ymm8, 0x40, 0x180
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc104
+ calc_f2_pre 0xd0, %eax, %edx, %edi
+ precalc32 %ymm12, %ymm8
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc105
+ calc_f2_pre 0xd4, %edi, %eax, %esi
+ precalc33 %ymm5, %ymm7
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc106
+ calc_f2_pre 0xd8, %esi, %edi, %ebx
+ precalc34 %ymm14
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc107
+ calc_f2_pre 0xdc, %ebx, %esi, %ecx
+ precalc35 %ymm7
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc108
+ calc_f2_pre 0xf0, %ecx, %ebx, %edx
+ precalc36 %ymm7
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc109
+ calc_f2_pre 0xf4, %edx, %ecx, %eax
+ precalc37 %ymm7
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc110
+ calc_f2_pre 0xf8, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc111
+ calc_f2_pre 0xfc, %edi, %eax, %esi
+ precalc39 %ymm7, 0x40, 0x1a0
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc112
+ calc_f2_pre 0x110, %esi, %edi, %ebx
+ precalc32 %ymm8, %ymm7
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc113
+ calc_f2_pre 0x114, %ebx, %esi, %ecx
+ precalc33 %ymm3, %ymm5
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc114
+ calc_f2_pre 0x118, %ecx, %ebx, %edx
+ precalc34 %ymm13
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc115
+ calc_f2_pre 0x11c, %edx, %ecx, %eax
+ precalc35 %ymm5
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc116
+ calc_f2_pre 0x130, %eax, %edx, %edi
+ precalc37 %ymm5
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc117
+ calc_f2_pre 0x134, %edi, %eax, %esi
+ precalc37 %ymm5
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc118
+ calc_f2_pre 0x138, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc119
+ calc_f3_pre 0x13c, %ecx
+ precalc39 %ymm5, 0x40, 0x1c0
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc120
+ calc_f3_pre 0x150, %edx
+ precalc32 %ymm7, %ymm5
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc121
+ calc_f3_pre 0x154, %eax
+ precalc33 %ymm15, %ymm3
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc122
+ calc_f3_pre 0x158, %edi
+ precalc34 %ymm12
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc123
+ calc_f3_pre 0x15c, %esi
+ precalc35 %ymm3
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc124
+ calc_f3_pre 0x170, %ebx
+ precalc36 %ymm3
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc125
+ calc_f3_pre 0x174, %ecx
+ precalc37 %ymm3
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc126
+ calc_f3_pre 0x178, %edx
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc127
+ calc_f3_pre 0x17c, %eax
+ precalc39 %ymm3, 0x60, 0x1e0
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc128
+ calc_f3_pre 0x190, %edi
+ precalc32 %ymm5, %ymm3
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc129
+ calc_f3_pre 0x194, %esi
+ precalc33 %ymm14, %ymm15
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc130
+ calc_f3_pre 0x198, %ebx
+ precalc34 %ymm8
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc131
+ calc_f3_pre 0x19c, %ecx
+ precalc35 %ymm15
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc132
+ calc_f3_pre 0x1b0, %edx
+ precalc36 %ymm15
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc133
+ calc_f3_pre 0x1b4, %eax
+ precalc37 %ymm15
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc134
+ calc_f3_pre 0x1b8, %edi
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc135
+ calc_f3_pre 0x1bc, %esi
+ precalc39 %ymm15, 0x60, 0x200
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc136
+ calc_f3_pre 0x1d0, %ebx
+ precalc32 %ymm3, %ymm15
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc137
+ calc_f3_pre 0x1d4, %ecx
+ precalc33 %ymm13, %ymm14
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc138
+ calc_f3_pre 0x1d8, %edx
+ precalc34 %ymm7
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc139
+ calc_f2_pre 0x1cc, %edx, %ecx, %eax
+ precalc35 %ymm14
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc140
+ calc_f2_pre 0x1f0, %eax, %edx, %edi
+ precalc36 %ymm14
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc141
+ calc_f2_pre 0x1f4, %edi, %eax, %esi
+ precalc37 %ymm14
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc142
+ calc_f2_pre 0x1f8, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc143
+ calc_f2_pre 0x1fc, %ebx, %esi, %ecx
+ precalc39 %ymm14, 0x60, 0x220
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc144
+ calc_f2_pre 0x210, %ecx, %ebx, %edx
+ precalc32 %ymm15, %ymm14
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc145
+ calc_f2_pre 0x214, %edx, %ecx, %eax
+ precalc33 %ymm12, %ymm13
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc146
+ calc_f2_pre 0x218, %eax, %edx, %edi
+ precalc34 %ymm5
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc147
+ calc_f2_pre 0x21c, %edi, %eax, %esi
+ precalc35 %ymm13
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc148
+ calc_f2_pre 0x230, %esi, %edi, %ebx
+ precalc36 %ymm13
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc149
+ calc_f2_pre 0x234, %ebx, %esi, %ecx
+ precalc37 %ymm13
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc150
+ calc_f2_pre 0x238, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc151
+ calc_f2_pre 0x23c, %edx, %ecx, %eax
+ precalc39 %ymm13, 0x60, 0x240
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc152
+ calc_f2_pre 0x250, %eax, %edx, %edi
+ precalc32 %ymm14, %ymm13
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc153
+ calc_f2_pre 0x254, %edi, %eax, %esi
+ precalc33 %ymm8, %ymm12
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc154
+ calc_f2_pre 0x258, %esi, %edi, %ebx
+ precalc34 %ymm3
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc155
+ calc_f2_pre 0x25c, %ebx, %esi, %ecx
+ precalc35 %ymm12
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc156
+ calc_f2_pre 0x270, %ecx, %ebx, %edx
+ precalc36 %ymm12
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc157
+ calc_f2_pre 0x274, %edx, %ecx, %eax
+ precalc37 %ymm12
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc158
+ calc_f2_pre 0x278, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc159
+ add 0x27c(%r15), %esi
+ add %eax, %esi
+ rorx $0x1b, %edi, %r12d
+ precalc39 %ymm12, 0x60, 0x260
+ add %r12d, %esi
+.endm
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_avx2)
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ sub $1408+8, %rsp
+
+ and $~63, %rdx
+ lea k_xmm_ar(%rip), %r8
+ mov %rdi, %r9
+ mov %rsi, %r10
+ lea 64(%rsi), %r13
+ lea 64(%rsi, %rdx), %r11
+ cmp %r11, %r13
+ cmovae %r8, %r13
+ vmovdqu bswap_shufb_ctl(%rip), %ymm10
+
+ mov (%r9), %ecx
+ mov 4(%r9), %esi
+ mov 8(%r9), %edi
+ mov 12(%r9), %eax
+ mov 16(%r9), %edx
+ mov %rsp, %r14
+ lea 2*4*80+32(%rsp), %r15
+ precalc // precalc WK for first 2 blocks
+ xchg %r14, %r15
+
+ // this is unrolled
+.Loop: cmp %r8, %r10 // we use the value of R8 (set below)
+ // as a signal of the last block
+ jne .Lbegin
+ add $1408+8, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ vzeroupper
+ ret
+
+.Lbegin:
+ calc0
+ calc1
+ calc2
+ calc3
+ calc4
+ calc5
+ calc6
+ calc7
+ calc8
+ calc9
+ calc10
+ calc11
+ calc12
+ calc13
+ calc14
+ calc15
+ calc16
+ calc17
+ calc18
+ calc19
+ calc20
+ calc21
+ calc22
+ calc23
+ calc24
+ calc25
+ calc26
+ calc27
+ calc28
+ calc29
+ calc30
+ calc31
+ calc32
+ calc33
+ calc34
+ calc35
+ calc36
+ calc37
+ calc38
+ calc39
+ calc40
+ calc41
+ calc42
+ calc43
+ calc44
+ calc45
+ calc46
+ calc47
+ calc48
+ calc49
+ calc50
+ calc51
+ calc52
+ calc53
+ calc54
+ calc55
+ calc56
+ calc57
+ calc58
+ calc59
+
+ add $128, %r10 // move to the next even-64-byte block
+ cmp %r11, %r10 // is the current block the last one?
+ cmovae %r10, %r8 // signal the last iteration smartly
+
+ calc60
+ calc61
+ calc62
+ calc63
+ calc64
+ calc65
+ calc66
+ calc67
+ calc68
+ calc69
+ calc70
+ calc71
+ calc72
+ calc73
+ calc74
+ calc75
+ calc76
+ calc77
+ calc78
+ calc79
+
+ update_hash %eax, %edx, %ebx, %esi, %edi
+ cmp %r8, %r10 // is the current block the last one?
+ je .Loop
+ mov %edx, %ecx
+
+ calc80
+ calc81
+ calc82
+ calc83
+ calc84
+ calc85
+ calc86
+ calc87
+ calc88
+ calc89
+ calc90
+ calc91
+ calc92
+ calc93
+ calc94
+ calc95
+ calc96
+ calc97
+ calc98
+ calc99
+ calc100
+ calc101
+ calc102
+ calc103
+ calc104
+ calc105
+ calc106
+ calc107
+ calc108
+ calc109
+ calc110
+ calc111
+ calc112
+ calc113
+ calc114
+ calc115
+ calc116
+ calc117
+ calc118
+ calc119
+ calc120
+ calc121
+ calc122
+ calc123
+ calc124
+ calc125
+ calc126
+ calc127
+ calc128
+ calc129
+ calc130
+ calc131
+ calc132
+ calc133
+ calc134
+ calc135
+ calc136
+ calc137
+ calc138
+ calc139
+
+ add $128, %r13 // move to the next even-64-byte block
+ cmp %r11, %r13 // is the current block the last one?
+ cmovae %r8, %r10
+
+ calc140
+ calc141
+ calc142
+ calc143
+ calc144
+ calc145
+ calc146
+ calc147
+ calc148
+ calc149
+ calc150
+ calc151
+ calc152
+ calc153
+ calc154
+ calc155
+ calc156
+ calc157
+ calc158
+ calc159
+
+ update_hash %esi, %edi, %edx, %ecx, %ebx
+ mov %esi, %r12d // reset state for AVX2 reg permutation
+ mov %edi, %esi
+ mov %edx, %edi
+ mov %ebx, %edx
+ mov %ecx, %eax
+ mov %r12d, %ecx
+ xchg %r14, %r15
+ jmp .Loop
+END(_libmd_sha1block_avx2)
+
+ .section .rodata
+ .balign 32
+k_xmm_ar:
+ .fill 8, 4, 0x5a827999
+ .fill 8, 4, 0x6ed9eba1
+ .fill 8, 4, 0x8f1bbcdc
+ .fill 8, 4, 0xca62c1d6
+ .size k_xmm_ar, .-k_xmm_ar
+
+bswap_shufb_ctl:
+ .4byte 0x00010203
+ .4byte 0x04050607
+ .4byte 0x08090a0b
+ .4byte 0x0c0d0e0f
+ .4byte 0x00010203
+ .4byte 0x04050607
+ .4byte 0x08090a0b
+ .4byte 0x0c0d0e0f
+ .size bswap_shufb_ctl, .-bswap_shufb_ctl
+
+ /*
+ * SHA1 implementation using the Intel SHA extensions (SHANI).
+ *
+ * Imlemented according to the Intel white paper
+ *
+ * S. Gulley, V. Gopal, K. Yap, W. Feghali, J. Guilford,
+ * G. Wolrich: "Intel SHA Extensions: new instruction supporting
+ * the Secure Hash Algorithm on Intel® architecture processors",
+ * July 2013.
+ */
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_shani)
+ and $~63, %rdx // round length to block-size multiple
+ lea (%rsi, %rdx, 1), %rcx // end pointer
+ test %rdx, %rdx // nothing to do?
+ je 1f // if so, terminate immediately
+
+ movdqu (%rdi), %xmm6 // h0, h1, h2, h3
+ pxor %xmm7, %xmm7
+ pshufd $0x1b, %xmm6, %xmm6 // h3, h2, h1, h0
+ pinsrd $3, 16(%rdi), %xmm7 // h4 in the highest word of xmm7
+ movdqu shuf_mask(%rip), %xmm4
+
+ // main loop
+0: movdqa %xmm6, %xmm8 // stash ABCD
+ movdqa %xmm7, %xmm9 // stash E
+
+ // rounds 0--3
+ movdqu 0*16(%rsi), %xmm0 // load first message block
+ pshufb %xmm4, %xmm0 // and byte-swap
+ paddd %xmm0, %xmm7 // E += w[0]
+ movdqa %xmm6, %xmm5 // E' = A
+ sha1rnds4 $0, %xmm7, %xmm6 // perform rounds 0--3
+
+ // rounds 4--7
+ movdqu 1*16(%rsi), %xmm1
+ pshufb %xmm4, %xmm1
+ sha1nexte %xmm1, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1rnds4 $0, %xmm5, %xmm6
+ sha1msg1 %xmm1, %xmm0
+
+ // rounds 8--11
+ movdqu 2*16(%rsi), %xmm2
+ pshufb %xmm4, %xmm2
+ sha1nexte %xmm2, %xmm7
+ movdqa %xmm6, %xmm5
+ sha1rnds4 $0, %xmm7, %xmm6
+ sha1msg1 %xmm2, %xmm1
+ pxor %xmm2, %xmm0
+
+.macro midround msg3, msg0, msg1, msg2, e1, e0, k
+ sha1nexte \msg3, \e1
+ movdqa %xmm6, \e0
+ sha1msg2 \msg3, \msg0
+ sha1rnds4 $\k, \e1, %xmm6
+ sha1msg1 \msg3, \msg2
+ pxor \msg3, \msg1
+.endm
+
+ movdqu 3*16(%rsi), %xmm3 // load third message block
+ pshufb %xmm4, %xmm3
+
+ add $4*16, %rsi
+
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 0 // 12--15
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 0 // 16--19
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 20--23
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 1 // 24--27
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 1 // 28--31
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 1 // 32--35
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 36--39
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 40--43
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 2 // 44--47
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 2 // 48--51
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 2 // 52--55
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 56--59
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 3 // 60--63
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 3 // 64--67
+
+ // rounds 68--71
+ sha1nexte %xmm1, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1msg2 %xmm1, %xmm2
+ sha1rnds4 $3, %xmm5, %xmm6
+ pxor %xmm1, %xmm3
+
+ // rounds 72--75
+ sha1nexte %xmm2, %xmm7
+ movdqa %xmm6, %xmm5
+ sha1msg2 %xmm2, %xmm3
+ sha1rnds4 $3, %xmm7, %xmm6
+
+ // rounds 76--79
+ sha1nexte %xmm3, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1rnds4 $3, %xmm5, %xmm6
+
+ sha1nexte %xmm9, %xmm7 // add saved E
+ paddd %xmm8, %xmm6 // add saved ABCD
+
+ cmp %rsi, %rcx // end reached?
+ jne 0b
+
+ pshufd $0x1b, %xmm6, %xmm6 // restore order of h0--h3
+ movdqu %xmm6, (%rdi) // write h0--h3
+ pextrd $3, %xmm7, 16(%rdi) // write h4
+1: ret
+END(_libmd_sha1block_shani)
+
+ .section .rodata
+ .balign 16
+shuf_mask:
+ .8byte 0x08090a0b0c0d0e0f
+ .8byte 0x0001020304050607
+ .size shuf_mask, .-shuf_mask
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/amd64/sha1dispatch.c b/lib/libmd/amd64/sha1dispatch.c
new file mode 100644
--- /dev/null
+++ b/lib/libmd/amd64/sha1dispatch.c
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 2016 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1block_amd64.go.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/specialreg.h>
+#include <sha.h>
+#include <x86/ifunc.h>
+
+extern void _libmd_sha1block_scalar(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_avx2(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_shani(SHA1_CTX *, const void *, size_t);
+static void sha1block_avx2_wrapper(SHA1_CTX *, const void *, size_t);
+
+#define AVX2_STDEXT_NEEDED \
+ (CPUID_STDEXT_BMI1 | CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2)
+
+DEFINE_UIFUNC(, void, sha1_block, (SHA1_CTX *, const void *, size_t))
+{
+ if (cpu_stdext_feature & CPUID_STDEXT_SHA)
+ return (_libmd_sha1block_shani);
+ if ((cpu_stdext_feature & AVX2_STDEXT_NEEDED) == AVX2_STDEXT_NEEDED)
+ return (sha1block_avx2_wrapper);
+ else
+ return (_libmd_sha1block_scalar);
+}
+
+static void
+sha1block_avx2_wrapper(SHA1_CTX *c, const void *data, size_t len)
+{
+ if (len >= 256) {
+ /*
+ * sha1block_avx2 calculates sha1 for 2 block per iteration.
+ * It also interleaves the precalculation for next the block.
+ * So it may read up-to 192 bytes past the end of p.
+ * We may add checks inside sha1block_avx2, but this will
+ * just turn it into a copy of sha1block_scalar,
+ * so call it directly, instead.
+ */
+ size_t safe_len = len - 128;
+
+ if (safe_len % 128 != 0)
+ safe_len -= 64;
+
+ _libmd_sha1block_avx2(c, data, safe_len);
+ _libmd_sha1block_scalar(c, data + safe_len, len - safe_len);
+ } else
+ _libmd_sha1block_scalar(c, data, len);
+}
diff --git a/lib/libmd/i386/sha.S b/lib/libmd/i386/sha.S
deleted file mode 100644
--- a/lib/libmd/i386/sha.S
+++ /dev/null
@@ -1,1951 +0,0 @@
-/* -*- Fundamental -*- Emacs' assembler mode hoses this file */
-#ifndef PIC
-/* Run the C pre-processor over this file with one of the following defined
- * ELF - elf object files,
- * OUT - a.out object files,
- * BSDI - BSDI style a.out object files
- * SOL - Solaris style elf
- */
-
-#define TYPE(a,b) .type a,b
-#define SIZE(a,b) .size a,b
-
-#if defined(OUT) || defined(BSDI)
-#define sha1_block_x86 _sha1_block_x86
-
-#endif
-
-#ifdef OUT
-#define OK 1
-#define ALIGN 4
-#endif
-
-#ifdef BSDI
-#define OK 1
-#define ALIGN 4
-#undef SIZE
-#undef TYPE
-#define SIZE(a,b)
-#define TYPE(a,b)
-#endif
-
-#if defined(ELF) || defined(SOL)
-#define OK 1
-#define ALIGN 4
-#endif
-
-#ifndef OK
-You need to define one of
-ELF - elf systems - linux-elf, NetBSD and DG-UX
-OUT - a.out systems - linux-a.out and FreeBSD
-SOL - solaris systems, which are elf with strange comment lines
-BSDI - a.out with a very primative version of as.
-#endif
-
-/* Let the Assembler begin :-) */
- /* Don't even think of reading this code */
- /* It was automatically generated by sha1-586.pl */
- /* Which is a perl program used to generate the x86 assember for */
- /* any of elf, a.out, BSDI,Win32, or Solaris */
- /* eric <eay@cryptsoft.com> */
-
- .file "sha1-586.s"
- .version "01.01"
-gcc2_compiled.:
-.text
- .p2align ALIGN
-.globl sha1_block_x86
- TYPE(sha1_block_x86,@function)
-sha1_block_x86:
- pushl %esi
- pushl %ebp
- movl 20(%esp), %eax
- movl 16(%esp), %esi
- addl %esi, %eax
- movl 12(%esp), %ebp
- pushl %ebx
- subl $64, %eax
- pushl %edi
- movl 4(%ebp), %ebx
- subl $72, %esp
- movl 12(%ebp), %edx
- movl 16(%ebp), %edi
- movl 8(%ebp), %ecx
- movl %eax, 68(%esp)
- /* First we need to setup the X array */
- movl (%esi), %eax
-.L000start:
- /* First, load the words onto the stack in network byte order */
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, (%esp)
- movl 4(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 4(%esp)
- movl 8(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 8(%esp)
- movl 12(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 12(%esp)
- movl 16(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 16(%esp)
- movl 20(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 20(%esp)
- movl 24(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 24(%esp)
- movl 28(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 28(%esp)
- movl 32(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 32(%esp)
- movl 36(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 36(%esp)
- movl 40(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 40(%esp)
- movl 44(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 44(%esp)
- movl 48(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 48(%esp)
- movl 52(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 52(%esp)
- movl 56(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 56(%esp)
- movl 60(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 60(%esp)
- /* We now have the X array on the stack */
- /* starting at sp-4 */
- movl %esi, 64(%esp)
-
- /* Start processing */
- movl (%ebp), %eax
- /* 00_15 0 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl (%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 4(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 2 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 8(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 12(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 00_15 4 */
- movl %edi, %ebx
- movl %ecx, %ebp
- xorl %esi, %ebx
- roll $5, %ebp
- andl %edx, %ebx
- addl %eax, %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- movl 16(%esp), %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %esi, %ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- movl %edx, %eax
- addl %ebp, %ebx
- xorl %edi, %eax
- movl %ebx, %ebp
- andl %ecx, %eax
- roll $5, %ebp
- addl %esi, %ebp
- movl 20(%esp), %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- xorl %edi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %ebp, %eax
- /* 00_15 6 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl 24(%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 28(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 8 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 32(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 36(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 00_15 10 */
- movl %edi, %ebx
- movl %ecx, %ebp
- xorl %esi, %ebx
- roll $5, %ebp
- andl %edx, %ebx
- addl %eax, %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- movl 40(%esp), %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %esi, %ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- movl %edx, %eax
- addl %ebp, %ebx
- xorl %edi, %eax
- movl %ebx, %ebp
- andl %ecx, %eax
- roll $5, %ebp
- addl %esi, %ebp
- movl 44(%esp), %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- xorl %edi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %ebp, %eax
- /* 00_15 12 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl 48(%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 52(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 14 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 56(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 60(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 16_19 16 */
- nop
- movl (%esp), %ebp
- movl 8(%esp), %ebx
- xorl %ebp, %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %ebx
- movl %edi, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %esi, %ebp
- movl %ebx, (%esp)
- andl %edx, %ebp
- leal 1518500249(%ebx,%eax,1),%ebx
- xorl %esi, %ebp
- movl %ecx, %eax
- addl %ebp, %ebx
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- movl 4(%esp), %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 56(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %edx, %ebp
- xorl %edi, %ebp
- movl %eax, 4(%esp)
- andl %ecx, %ebp
- leal 1518500249(%eax,%esi,1),%eax
- xorl %edi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 16_19 18 */
- movl 8(%esp), %ebp
- movl 16(%esp), %esi
- xorl %ebp, %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl 60(%esp), %ebp
- xorl %ebp, %esi
- movl %ecx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %edx, %ebp
- movl %esi, 8(%esp)
- andl %ebx, %ebp
- leal 1518500249(%esi,%edi,1),%esi
- xorl %edx, %ebp
- movl %eax, %edi
- addl %ebp, %esi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- movl 12(%esp), %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl (%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %ebx, %ebp
- xorl %ecx, %ebp
- movl %edi, 12(%esp)
- andl %eax, %ebp
- leal 1518500249(%edi,%edx,1),%edi
- xorl %ecx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 20_39 20 */
- movl 16(%esp), %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 4(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 16(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 21 */
- movl 20(%esp), %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 8(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 20(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 22 */
- movl 24(%esp), %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 12(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 24(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 23 */
- movl 28(%esp), %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 16(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 28(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 24 */
- movl 32(%esp), %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 20(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 32(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 25 */
- movl 36(%esp), %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 24(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 36(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 26 */
- movl 40(%esp), %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 28(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 40(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 27 */
- movl 44(%esp), %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 32(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 44(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 28 */
- movl 48(%esp), %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 36(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 48(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 29 */
- movl 52(%esp), %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 40(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 52(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 30 */
- movl 56(%esp), %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 44(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 56(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 31 */
- movl 60(%esp), %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 48(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 60(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 32 */
- movl (%esp), %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 32(%esp), %ebp
- xorl %ebp, %edx
- movl 52(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, (%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 33 */
- movl 4(%esp), %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 36(%esp), %ebp
- xorl %ebp, %ecx
- movl 56(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 4(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 34 */
- movl 8(%esp), %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 40(%esp), %ebp
- xorl %ebp, %ebx
- movl 60(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 8(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 35 */
- movl 12(%esp), %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 44(%esp), %ebp
- xorl %ebp, %eax
- movl (%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 12(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 36 */
- movl 16(%esp), %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 48(%esp), %ebp
- xorl %ebp, %esi
- movl 4(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 16(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 37 */
- movl 20(%esp), %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 52(%esp), %ebp
- xorl %ebp, %edi
- movl 8(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 20(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 38 */
- movl 24(%esp), %edx
- movl 32(%esp), %ebp
- xorl %ebp, %edx
- movl 56(%esp), %ebp
- xorl %ebp, %edx
- movl 12(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 24(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 39 */
- movl 28(%esp), %ecx
- movl 36(%esp), %ebp
- xorl %ebp, %ecx
- movl 60(%esp), %ebp
- xorl %ebp, %ecx
- movl 16(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 28(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 40_59 40 */
- movl 32(%esp), %ebx
- movl 40(%esp), %ebp
- xorl %ebp, %ebx
- movl (%esp), %ebp
- xorl %ebp, %ebx
- movl 20(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 32(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 36(%esp), %eax
- addl %ebp, %ebx
- movl 44(%esp), %ebp
- xorl %ebp, %eax
- movl 4(%esp), %ebp
- xorl %ebp, %eax
- movl 24(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 36(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 41 */
- /* 40_59 42 */
- movl 40(%esp), %esi
- movl 48(%esp), %ebp
- xorl %ebp, %esi
- movl 8(%esp), %ebp
- xorl %ebp, %esi
- movl 28(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, 40(%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 44(%esp), %edi
- addl %ebp, %esi
- movl 52(%esp), %ebp
- xorl %ebp, %edi
- movl 12(%esp), %ebp
- xorl %ebp, %edi
- movl 32(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 44(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 43 */
- /* 40_59 44 */
- movl 48(%esp), %edx
- movl 56(%esp), %ebp
- xorl %ebp, %edx
- movl 16(%esp), %ebp
- xorl %ebp, %edx
- movl 36(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 48(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 52(%esp), %ecx
- addl %ebp, %edx
- movl 60(%esp), %ebp
- xorl %ebp, %ecx
- movl 20(%esp), %ebp
- xorl %ebp, %ecx
- movl 40(%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 52(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 45 */
- /* 40_59 46 */
- movl 56(%esp), %ebx
- movl (%esp), %ebp
- xorl %ebp, %ebx
- movl 24(%esp), %ebp
- xorl %ebp, %ebx
- movl 44(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 56(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 60(%esp), %eax
- addl %ebp, %ebx
- movl 4(%esp), %ebp
- xorl %ebp, %eax
- movl 28(%esp), %ebp
- xorl %ebp, %eax
- movl 48(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 60(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 47 */
- /* 40_59 48 */
- movl (%esp), %esi
- movl 8(%esp), %ebp
- xorl %ebp, %esi
- movl 32(%esp), %ebp
- xorl %ebp, %esi
- movl 52(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, (%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 4(%esp), %edi
- addl %ebp, %esi
- movl 12(%esp), %ebp
- xorl %ebp, %edi
- movl 36(%esp), %ebp
- xorl %ebp, %edi
- movl 56(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 4(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 49 */
- /* 40_59 50 */
- movl 8(%esp), %edx
- movl 16(%esp), %ebp
- xorl %ebp, %edx
- movl 40(%esp), %ebp
- xorl %ebp, %edx
- movl 60(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 8(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 12(%esp), %ecx
- addl %ebp, %edx
- movl 20(%esp), %ebp
- xorl %ebp, %ecx
- movl 44(%esp), %ebp
- xorl %ebp, %ecx
- movl (%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 12(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 51 */
- /* 40_59 52 */
- movl 16(%esp), %ebx
- movl 24(%esp), %ebp
- xorl %ebp, %ebx
- movl 48(%esp), %ebp
- xorl %ebp, %ebx
- movl 4(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 16(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 20(%esp), %eax
- addl %ebp, %ebx
- movl 28(%esp), %ebp
- xorl %ebp, %eax
- movl 52(%esp), %ebp
- xorl %ebp, %eax
- movl 8(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 20(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 53 */
- /* 40_59 54 */
- movl 24(%esp), %esi
- movl 32(%esp), %ebp
- xorl %ebp, %esi
- movl 56(%esp), %ebp
- xorl %ebp, %esi
- movl 12(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, 24(%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 28(%esp), %edi
- addl %ebp, %esi
- movl 36(%esp), %ebp
- xorl %ebp, %edi
- movl 60(%esp), %ebp
- xorl %ebp, %edi
- movl 16(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 28(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 55 */
- /* 40_59 56 */
- movl 32(%esp), %edx
- movl 40(%esp), %ebp
- xorl %ebp, %edx
- movl (%esp), %ebp
- xorl %ebp, %edx
- movl 20(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 32(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 36(%esp), %ecx
- addl %ebp, %edx
- movl 44(%esp), %ebp
- xorl %ebp, %ecx
- movl 4(%esp), %ebp
- xorl %ebp, %ecx
- movl 24(%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 36(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 57 */
- /* 40_59 58 */
- movl 40(%esp), %ebx
- movl 48(%esp), %ebp
- xorl %ebp, %ebx
- movl 8(%esp), %ebp
- xorl %ebp, %ebx
- movl 28(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 40(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 44(%esp), %eax
- addl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 32(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 44(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 59 */
- /* 20_39 60 */
- movl 48(%esp), %esi
- movl 56(%esp), %ebp
- xorl %ebp, %esi
- movl 16(%esp), %ebp
- xorl %ebp, %esi
- movl 36(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 48(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 61 */
- movl 52(%esp), %edi
- movl 60(%esp), %ebp
- xorl %ebp, %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 40(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 52(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 62 */
- movl 56(%esp), %edx
- movl (%esp), %ebp
- xorl %ebp, %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 44(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 56(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 63 */
- movl 60(%esp), %ecx
- movl 4(%esp), %ebp
- xorl %ebp, %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 48(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 60(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 64 */
- movl (%esp), %ebx
- movl 8(%esp), %ebp
- xorl %ebp, %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, (%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 65 */
- movl 4(%esp), %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 56(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 4(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 66 */
- movl 8(%esp), %esi
- movl 16(%esp), %ebp
- xorl %ebp, %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl 60(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 8(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 67 */
- movl 12(%esp), %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl (%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 12(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 68 */
- movl 16(%esp), %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 4(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 16(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 69 */
- movl 20(%esp), %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 8(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 20(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 70 */
- movl 24(%esp), %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 12(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 24(%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 71 */
- movl 28(%esp), %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 16(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 28(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 72 */
- movl 32(%esp), %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 20(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 32(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 73 */
- movl 36(%esp), %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 24(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 36(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 74 */
- movl 40(%esp), %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 28(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 40(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 75 */
- movl 44(%esp), %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 32(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 44(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 76 */
- movl 48(%esp), %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 36(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 48(%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 77 */
- movl 52(%esp), %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 40(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 52(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 78 */
- movl 56(%esp), %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 44(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 56(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 79 */
- movl 60(%esp), %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 48(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 60(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
- addl %ebp, %edx
- movl 92(%esp), %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- /* End processing */
-
- movl 12(%ebp), %edx
- addl %ebx, %edx
- movl 4(%ebp), %ebx
- addl %esi, %ebx
- movl %eax, %esi
- movl (%ebp), %eax
- movl %edx, 12(%ebp)
- addl %edi, %eax
- movl 16(%ebp), %edi
- addl %ecx, %edi
- movl 8(%ebp), %ecx
- addl %esi, %ecx
- movl %eax, (%ebp)
- movl 64(%esp), %esi
- movl %ecx, 8(%ebp)
- addl $64, %esi
- movl 68(%esp), %eax
- movl %edi, 16(%ebp)
- cmpl %esi, %eax
- movl %ebx, 4(%ebp)
- jb .L001end
- movl (%esi), %eax
- jmp .L000start
-.L001end:
- addl $72, %esp
- popl %edi
- popl %ebx
- popl %ebp
- popl %esi
- ret
-.sha1_block_x86_end:
- SIZE(sha1_block_x86,.sha1_block_x86_end-sha1_block_x86)
-.ident "desasm.pl"
-#endif
diff --git a/lib/libmd/sha1c.c b/lib/libmd/sha1c.c
--- a/lib/libmd/sha1c.c
+++ b/lib/libmd/sha1c.c
@@ -1,476 +1,244 @@
-/* crypto/sha/sha1dgst.c */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
+/*-
+ * Copyright (c) 2009 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1.go.
*
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young (eay@cryptsoft.com)"
- * The word 'cryptographic' can be left out if the routines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/types.h>
-
-#include <stdio.h>
+#include <assert.h>
+#include <sha.h>
+#include <stdint.h>
#include <string.h>
+#include <strings.h>
+#include <sys/endian.h>
-#if 0
-#include <machine/ansi.h> /* we use the __ variants of bit-sized types */
+#ifdef SHA1_ASM
+extern void sha1_block(SHA1_CTX *, const void *, size_t);
+#else
+static void sha1_block(SHA1_CTX *, const void *, size_t);
#endif
-#include <machine/endian.h>
-#undef SHA_0
-#define SHA_1
-#include "sha.h"
-#include "sha_locl.h"
+#define INIT0 0x67452301
+#define INIT1 0xEFCDAB89
+#define INIT2 0x98BADCFE
+#define INIT3 0x10325476
+#define INIT4 0xC3D2E1F0
-/*
- * The assembly-language code is not position-independent, so don't
- * try to use it in a shared library.
- */
-#ifdef PIC
-#undef SHA1_ASM
-#endif
+#define K0 0x5A827999
+#define K1 0x6ED9EBA1
+#define K2 0x8F1BBCDC
+#define K3 0xCA62C1D6
-static char *SHA1_version="SHA1 part of SSLeay 0.9.0b 11-Oct-1998";
+void
+SHA1_Init(SHA1_CTX *c)
+{
+ c->h0 = INIT0;
+ c->h1 = INIT1;
+ c->h2 = INIT2;
+ c->h3 = INIT3;
+ c->h4 = INIT4;
+ c->Nl = 0;
+ c->Nh = 0;
+ c->num = 0;
+}
-/* Implemented from SHA-1 document - The Secure Hash Algorithm
- */
+void
+SHA1_Update(SHA1_CTX *c, const void *data, size_t len)
+{
+ uint64_t nn;
+ const char *p = data;
-#define INIT_DATA_h0 (unsigned long)0x67452301L
-#define INIT_DATA_h1 (unsigned long)0xefcdab89L
-#define INIT_DATA_h2 (unsigned long)0x98badcfeL
-#define INIT_DATA_h3 (unsigned long)0x10325476L
-#define INIT_DATA_h4 (unsigned long)0xc3d2e1f0L
-
-#define K_00_19 0x5a827999L
-#define K_20_39 0x6ed9eba1L
-#define K_40_59 0x8f1bbcdcL
-#define K_60_79 0xca62c1d6L
-
-#ifndef NOPROTO
-# ifdef SHA1_ASM
- void sha1_block_x86(SHA_CTX *c, const u_int32_t *p, int num);
-# define sha1_block sha1_block_x86
-# else
- void sha1_block(SHA_CTX *c, const u_int32_t *p, int num);
-# endif
-#else
-# ifdef SHA1_ASM
- void sha1_block_x86();
-# define sha1_block sha1_block_x86
-# else
- void sha1_block();
-# endif
-#endif
+ nn = (uint64_t)c->Nl | (uint64_t)c->Nh << 32;
+ nn += len;
+ c->Nl = (uint32_t)nn;
+ c->Nh = (uint32_t)(nn >> 32);
+ if (c->num > 0) {
+ size_t n = SHA_CBLOCK - c->num;
-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
-# define M_c2nl c2l
-# define M_p_c2nl p_c2l
-# define M_c2nl_p c2l_p
-# define M_p_c2nl_p p_c2l_p
-# define M_nl2c l2c
-#else
-# define M_c2nl c2nl
-# define M_p_c2nl p_c2nl
-# define M_c2nl_p c2nl_p
-# define M_p_c2nl_p p_c2nl_p
-# define M_nl2c nl2c
-#endif
+ if (n > len)
+ n = len;
+
+ memcpy((char *)c->data + c->num, p, n);
+ c->num += n;
+ if (c->num == SHA_CBLOCK) {
+ sha1_block(c, (void *)c->data, SHA_CBLOCK);
+ c->num = 0;
+ }
+
+ p += n;
+ len -= n;
+ }
+
+ if (len >= SHA_CBLOCK) {
+ size_t n = len & ~(size_t)(SHA_CBLOCK - 1);
+
+ sha1_block(c, p, n);
+ p += n;
+ len -= n;
+ }
-void SHA1_Init(SHA_CTX *c)
- {
- c->h0=INIT_DATA_h0;
- c->h1=INIT_DATA_h1;
- c->h2=INIT_DATA_h2;
- c->h3=INIT_DATA_h3;
- c->h4=INIT_DATA_h4;
- c->Nl=0;
- c->Nh=0;
- c->num=0;
+ if (len > 0) {
+ memcpy(c->data, p, len);
+ c->num = len;
}
+}
void
-SHA1_Update(SHA_CTX *c, const void *in, size_t len)
+SHA1_Final(unsigned char *md, SHA1_CTX *c)
{
- u_int32_t *p;
- int ew,ec,sw,sc;
- u_int32_t l;
- const unsigned char *data = in;
-
- if (len == 0) return;
-
- l=(c->Nl+(len<<3))&0xffffffffL;
- if (l < c->Nl) /* overflow */
- c->Nh++;
- c->Nh+=(len>>29);
- c->Nl=l;
-
- if (c->num != 0)
- {
- p=c->data;
- sw=c->num>>2;
- sc=c->num&0x03;
-
- if ((c->num+len) >= SHA_CBLOCK)
- {
- l= p[sw];
- M_p_c2nl(data,l,sc);
- p[sw++]=l;
- for (; sw<SHA_LBLOCK; sw++)
- {
- M_c2nl(data,l);
- p[sw]=l;
- }
- len-=(SHA_CBLOCK-c->num);
-
- sha1_block(c,p,64);
- c->num=0;
- /* drop through and do the rest */
- }
- else
- {
- c->num+=(int)len;
- if ((sc+len) < 4) /* ugly, add char's to a word */
- {
- l= p[sw];
- M_p_c2nl_p(data,l,sc,len);
- p[sw]=l;
- }
- else
- {
- ew=(c->num>>2);
- ec=(c->num&0x03);
- l= p[sw];
- M_p_c2nl(data,l,sc);
- p[sw++]=l;
- for (; sw < ew; sw++)
- { M_c2nl(data,l); p[sw]=l; }
- if (ec)
- {
- M_c2nl_p(data,l,ec);
- p[sw]=l;
- }
- }
- return;
- }
+ uint64_t len;
+ size_t t;
+ unsigned char tmp[SHA_CBLOCK + sizeof(uint64_t)] = {0x80, 0};
+
+ len = (uint64_t)c->Nl | (uint64_t)c->Nh << 32;
+ t = 64 + 56 - c->Nl % 64;
+ if (t > 64)
+ t -= 64;
+
+ /* length in bits */
+ len <<= 3;
+ be64enc(tmp + t, len);
+ SHA1_Update(c, tmp, t + 8);
+ assert(c->num == 0);
+
+ be32enc(md + 0, c->h0);
+ be32enc(md + 4, c->h1);
+ be32enc(md + 8, c->h2);
+ be32enc(md + 12, c->h3);
+ be32enc(md + 16, c->h4);
+
+ explicit_bzero(c, sizeof(*c));
+}
+
+#ifndef SHA1_ASM
+static void
+/* invariant: len is a multiple of SHA_CBLOCK */
+sha1_block(SHA1_CTX *c, const void *data, size_t len)
+{
+ uint32_t w[16];
+ uint32_t h0 = c->h0, h1 = c->h1, h2 = c->h2, h3 = c->h3, h4 = c->h4;
+ const char *p = data;
+
+ while (len >= SHA_CBLOCK) {
+ size_t i;
+ uint32_t a = h0, b = h1, c = h2, d = h3, e = h4;
+ uint32_t f, t, tmp;
+
+# pragma unroll
+ for (i = 0; i < 16; i++)
+ w[i] = be32dec(p + 4*i);
+
+# pragma unroll
+ for (i = 0; i < 16; i++) {
+ f = b & c | ~b & d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K0;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
- /* We can only do the following code for assember, the reason
- * being that the sha1_block 'C' version changes the values
- * in the 'data' array. The assember code avoids this and
- * copies it to a local array. I should be able to do this for
- * the C version as well....
- */
-#if 1
-#if BYTE_ORDER == BIG_ENDIAN || defined(SHA1_ASM)
- if ((((unsigned int)data)%sizeof(u_int32_t)) == 0)
- {
- sw=len/SHA_CBLOCK;
- if (sw)
- {
- sw*=SHA_CBLOCK;
- sha1_block(c,(u_int32_t *)data,sw);
- data+=sw;
- len-=sw;
- }
+
+# pragma unroll
+ for (; i < 20; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b & c | ~b & d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K0;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
-#endif
-#endif
- /* we now can process the input data in blocks of SHA_CBLOCK
- * chars and save the leftovers to c->data. */
- p=c->data;
- while (len >= SHA_CBLOCK)
- {
-#if BYTE_ORDER == BIG_ENDIAN || BYTE_ORDER == LITTLE_ENDIAN
- if (p != (u_int32_t *)data)
- memcpy(p,data,SHA_CBLOCK);
- data+=SHA_CBLOCK;
-# if BYTE_ORDER == LITTLE_ENDIAN
-# ifndef SHA1_ASM /* Will not happen */
- for (sw=(SHA_LBLOCK/4); sw; sw--)
- {
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
- }
- p=c->data;
-# endif
-# endif
-#else
- for (sw=(SHA_BLOCK/4); sw; sw--)
- {
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- }
- p=c->data;
-#endif
- sha1_block(c,p,64);
- len-=SHA_CBLOCK;
+
+# pragma unroll
+ for (; i < 40; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b ^ c ^ d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K1;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
- ec=(int)len;
- c->num=ec;
- ew=(ec>>2);
- ec&=0x03;
-
- for (sw=0; sw < ew; sw++)
- { M_c2nl(data,l); p[sw]=l; }
- M_c2nl_p(data,l,ec);
- p[sw]=l;
- }
-static void SHA1_Transform(SHA_CTX *c, unsigned char *b)
- {
- u_int32_t p[16];
-#if BYTE_ORDER != BIG_ENDIAN
- u_int32_t *q;
- int i;
-#endif
+# pragma unroll
+ for (; i < 60; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = (b | c) & d | b & c;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K2;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
+ }
-#if BYTE_ORDER == BIG_ENDIAN || BYTE_ORDER == LITTLE_ENDIAN
- memcpy(p,b,64);
-#if BYTE_ORDER == LITTLE_ENDIAN
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
- {
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
+# pragma unroll
+ for (; i < 80; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b ^ c ^ d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K3;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
-#endif
-#else
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
- {
- u_int32_t l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- }
-#endif
- sha1_block(c,p,64);
- }
-#ifndef SHA1_ASM
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
-void
-sha1_block(SHA_CTX *c, const u_int32_t *W, int num)
-{
- u_int32_t A,B,C,D,E,T;
- u_int32_t X[16];
-
- A=c->h0;
- B=c->h1;
- C=c->h2;
- D=c->h3;
- E=c->h4;
-
- for (;;)
- {
- BODY_00_15( 0,A,B,C,D,E,T,W);
- BODY_00_15( 1,T,A,B,C,D,E,W);
- BODY_00_15( 2,E,T,A,B,C,D,W);
- BODY_00_15( 3,D,E,T,A,B,C,W);
- BODY_00_15( 4,C,D,E,T,A,B,W);
- BODY_00_15( 5,B,C,D,E,T,A,W);
- BODY_00_15( 6,A,B,C,D,E,T,W);
- BODY_00_15( 7,T,A,B,C,D,E,W);
- BODY_00_15( 8,E,T,A,B,C,D,W);
- BODY_00_15( 9,D,E,T,A,B,C,W);
- BODY_00_15(10,C,D,E,T,A,B,W);
- BODY_00_15(11,B,C,D,E,T,A,W);
- BODY_00_15(12,A,B,C,D,E,T,W);
- BODY_00_15(13,T,A,B,C,D,E,W);
- BODY_00_15(14,E,T,A,B,C,D,W);
- BODY_00_15(15,D,E,T,A,B,C,W);
- BODY_16_19(16,C,D,E,T,A,B,W,W,W,W);
- BODY_16_19(17,B,C,D,E,T,A,W,W,W,W);
- BODY_16_19(18,A,B,C,D,E,T,W,W,W,W);
- BODY_16_19(19,T,A,B,C,D,E,W,W,W,X);
-
- BODY_20_31(20,E,T,A,B,C,D,W,W,W,X);
- BODY_20_31(21,D,E,T,A,B,C,W,W,W,X);
- BODY_20_31(22,C,D,E,T,A,B,W,W,W,X);
- BODY_20_31(23,B,C,D,E,T,A,W,W,W,X);
- BODY_20_31(24,A,B,C,D,E,T,W,W,X,X);
- BODY_20_31(25,T,A,B,C,D,E,W,W,X,X);
- BODY_20_31(26,E,T,A,B,C,D,W,W,X,X);
- BODY_20_31(27,D,E,T,A,B,C,W,W,X,X);
- BODY_20_31(28,C,D,E,T,A,B,W,W,X,X);
- BODY_20_31(29,B,C,D,E,T,A,W,W,X,X);
- BODY_20_31(30,A,B,C,D,E,T,W,X,X,X);
- BODY_20_31(31,T,A,B,C,D,E,W,X,X,X);
- BODY_32_39(32,E,T,A,B,C,D,X);
- BODY_32_39(33,D,E,T,A,B,C,X);
- BODY_32_39(34,C,D,E,T,A,B,X);
- BODY_32_39(35,B,C,D,E,T,A,X);
- BODY_32_39(36,A,B,C,D,E,T,X);
- BODY_32_39(37,T,A,B,C,D,E,X);
- BODY_32_39(38,E,T,A,B,C,D,X);
- BODY_32_39(39,D,E,T,A,B,C,X);
-
- BODY_40_59(40,C,D,E,T,A,B,X);
- BODY_40_59(41,B,C,D,E,T,A,X);
- BODY_40_59(42,A,B,C,D,E,T,X);
- BODY_40_59(43,T,A,B,C,D,E,X);
- BODY_40_59(44,E,T,A,B,C,D,X);
- BODY_40_59(45,D,E,T,A,B,C,X);
- BODY_40_59(46,C,D,E,T,A,B,X);
- BODY_40_59(47,B,C,D,E,T,A,X);
- BODY_40_59(48,A,B,C,D,E,T,X);
- BODY_40_59(49,T,A,B,C,D,E,X);
- BODY_40_59(50,E,T,A,B,C,D,X);
- BODY_40_59(51,D,E,T,A,B,C,X);
- BODY_40_59(52,C,D,E,T,A,B,X);
- BODY_40_59(53,B,C,D,E,T,A,X);
- BODY_40_59(54,A,B,C,D,E,T,X);
- BODY_40_59(55,T,A,B,C,D,E,X);
- BODY_40_59(56,E,T,A,B,C,D,X);
- BODY_40_59(57,D,E,T,A,B,C,X);
- BODY_40_59(58,C,D,E,T,A,B,X);
- BODY_40_59(59,B,C,D,E,T,A,X);
-
- BODY_60_79(60,A,B,C,D,E,T,X);
- BODY_60_79(61,T,A,B,C,D,E,X);
- BODY_60_79(62,E,T,A,B,C,D,X);
- BODY_60_79(63,D,E,T,A,B,C,X);
- BODY_60_79(64,C,D,E,T,A,B,X);
- BODY_60_79(65,B,C,D,E,T,A,X);
- BODY_60_79(66,A,B,C,D,E,T,X);
- BODY_60_79(67,T,A,B,C,D,E,X);
- BODY_60_79(68,E,T,A,B,C,D,X);
- BODY_60_79(69,D,E,T,A,B,C,X);
- BODY_60_79(70,C,D,E,T,A,B,X);
- BODY_60_79(71,B,C,D,E,T,A,X);
- BODY_60_79(72,A,B,C,D,E,T,X);
- BODY_60_79(73,T,A,B,C,D,E,X);
- BODY_60_79(74,E,T,A,B,C,D,X);
- BODY_60_79(75,D,E,T,A,B,C,X);
- BODY_60_79(76,C,D,E,T,A,B,X);
- BODY_60_79(77,B,C,D,E,T,A,X);
- BODY_60_79(78,A,B,C,D,E,T,X);
- BODY_60_79(79,T,A,B,C,D,E,X);
-
- c->h0=(c->h0+E)&0xffffffffL;
- c->h1=(c->h1+T)&0xffffffffL;
- c->h2=(c->h2+A)&0xffffffffL;
- c->h3=(c->h3+B)&0xffffffffL;
- c->h4=(c->h4+C)&0xffffffffL;
-
- num-=64;
- if (num <= 0) break;
-
- A=c->h0;
- B=c->h1;
- C=c->h2;
- D=c->h3;
- E=c->h4;
-
- W+=16;
- }
+ p += SHA_CBLOCK;
+ len -= SHA_CBLOCK;
}
-#endif
-void SHA1_Final(unsigned char *md, SHA_CTX *c)
- {
- int i,j;
- u_int32_t l;
- u_int32_t *p;
- static unsigned char end[4]={0x80,0x00,0x00,0x00};
- unsigned char *cp=end;
-
- /* c->num should definitly have room for at least one more byte. */
- p=c->data;
- j=c->num;
- i=j>>2;
-#ifdef PURIFY
- if ((j&0x03) == 0) p[i]=0;
+ c->h0 = h0;
+ c->h1 = h1;
+ c->h2 = h2;
+ c->h3 = h3;
+ c->h4 = h4;
+}
#endif
- l=p[i];
- M_p_c2nl(cp,l,j&0x03);
- p[i]=l;
- i++;
- /* i is the next 'undefined word' */
- if (c->num >= SHA_LAST_BLOCK)
- {
- for (; i<SHA_LBLOCK; i++)
- p[i]=0;
- sha1_block(c,p,64);
- i=0;
- }
- for (; i<(SHA_LBLOCK-2); i++)
- p[i]=0;
- p[SHA_LBLOCK-2]=c->Nh;
- p[SHA_LBLOCK-1]=c->Nl;
-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
- Endian_Reverse32(p[SHA_LBLOCK-2]);
- Endian_Reverse32(p[SHA_LBLOCK-1]);
-#endif
- sha1_block(c,p,64);
- cp=md;
- l=c->h0; nl2c(l,cp);
- l=c->h1; nl2c(l,cp);
- l=c->h2; nl2c(l,cp);
- l=c->h3; nl2c(l,cp);
- l=c->h4; nl2c(l,cp);
-
- /* Clear the context state */
- explicit_bzero(&c, sizeof(c));
- }
#ifdef WEAK_REFS
/* When building libmd, provide weak references. Note: this is not

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 1, 2:55 AM (16 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16374401
Default Alt Text
D45444.diff (115 KB)

Event Timeline