Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109047897
D45444.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
115 KB
Referenced Files
None
Subscribers
None
D45444.diff
View Options
diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile
--- a/lib/libmd/Makefile
+++ b/lib/libmd/Makefile
@@ -117,9 +117,12 @@
.endif
.if ${USE_ASM_SOURCES} != 0
-.if exists(${MACHINE_ARCH}/sha.S)
-SRCS+= sha.S
+.if exists(${MACHINE_ARCH}/sha1block.S)
+SRCS+= sha1block.S
CFLAGS+= -DSHA1_ASM
+.if exists(${MACHINE_ARCH}/sha1dispatch.c)
+SRCS+= sha1dispatch.c
+.endif
.endif
.if exists(${MACHINE_ARCH}/rmd160.S)
SRCS+= rmd160.S
@@ -135,7 +138,7 @@
# the assembly vs C versions, and skein_block needs to be rebuilt if it changes.
skein_block.o skein_block.pico: Makefile
.endif
-.if exists(${MACHINE_ARCH}/sha.S) || exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.S)
+.if exists(${MACHINE_ARCH}/rmd160.S) || exists(${MACHINE_ARCH}/skein_block_asm.S)
ACFLAGS+= -DELF -Wa,--noexecstack
.endif
.if ${MACHINE_CPUARCH} == "aarch64"
diff --git a/lib/libmd/aarch64/sha1block.S b/lib/libmd/aarch64/sha1block.S
new file mode 100644
--- /dev/null
+++ b/lib/libmd/aarch64/sha1block.S
@@ -0,0 +1,490 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * sha1block_sha1 implementation based on sha1-arm.c,
+ * written and placed in public domain by Jeffrey Walton
+ * based on code from ARM, and by Johannes Schneiders, Skip
+ * Hovsmith and Barry O'Rourke for the mbedTLS project.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * Scalar SHA1 implementation.
+ *
+ * Due to the ample register file available on AArch64, the w array is
+ * kept entirely in registers. The saved a-e variables are instead kept
+ * in memory as we don't have that much memory.
+ */
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_scalar)
+ctx .req x0
+buf .req x1
+len .req x2
+w .req sp
+a .req w3
+b .req w4
+c .req w5
+d .req w6
+e .req w7
+k .req w8
+f .req w9
+tmp .req w10
+w_0 .req w11
+w_1 .req w12
+w_2 .req w13
+w_3 .req w14
+w_4 .req w15
+w_5 .req w16
+w_6 .req w17
+// w18 is the platform register
+w_7 .req w19
+w_8 .req w20
+w_9 .req w21
+w_10 .req w22
+w_11 .req w23
+w_12 .req w24
+w_13 .req w25
+w_14 .req w26
+w_15 .req w27
+
+.macro shuffle w_i, w_i3, w_i8, w_i14
+ eor \w_i, \w_i, \w_i3
+ eor tmp, \w_i8, \w_i14
+ eor \w_i, \w_i, tmp // w[i-16] ^ w[i-14] ^ w[i-8] ^ w[i-3]
+ ror \w_i, \w_i, #31 // w[i] = ... ror #31
+.endm
+
+.macro func1 a, b, c, d, e
+ and f, \c, \b
+ bic tmp, \d, \b
+ orr f, f, tmp
+.endm
+
+.macro func2 a, b, c, d, e
+ eor f, \b, \c
+ eor f, f, \d
+.endm
+
+.macro func3 a, b, c, d, e
+ eor tmp, \b, \c
+ and f, \b, \c
+ and tmp, tmp, \d
+ orr f, f, tmp
+.endm
+
+.macro func4 a, b, c, d, e
+ func2 \a, \b, \c, \d, \e
+.endm
+
+.macro mix a, b, c, d, e, w_i
+ ror \b, \b, #2
+ ror tmp, \a, #27
+ add \e, \e, \w_i
+ add tmp, tmp, k
+ add \e, \e, f
+ add \e, \e, tmp // (a ror 27) + e + f + k + w[i]
+.endm
+
+.macro round1 a, b, c, d, e, w_i
+ func1 \a, \b, \c, \d, \e
+ rev \w_i, \w_i
+ mix \a, \b, \c, \d, \e, \w_i
+.endm
+
+.macro round func, a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ shuffle \w_i, \w_i3, \w_i8, \w_i14
+ \func \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, \w_i
+.endm
+
+.macro round1x a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func1, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round2 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func2, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round3 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func3, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+.macro round4 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
+ round func4, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
+.endm
+
+ ands len, len, #~63 // take length in multiples of block length
+ beq 1f // bail out if input empty
+
+ sub sp, sp, #24+9*8 // allocate stack space
+ str x19, [sp, #24+0*8]
+ stp x20, x21, [sp, #24+1*8]
+ stp x22, x23, [sp, #24+3*8]
+ stp x24, x25, [sp, #24+5*8]
+ stp x26, x27, [sp, #24+7*8]
+
+ ldp a, b, [ctx, #0] // load SHA1 state from context
+ ldp c, d, [ctx, #8]
+ ldr e, [ctx, #16]
+
+0: stp a, b, [sp, #0] // save old SHA1 state
+ stp c, d, [sp, #8]
+ str e, [sp, #16]
+
+ movz k, #0x7999 // round constant 1
+ movk k, #0x5a82, lsl #16
+
+ ldp w_0, w_1, [buf, #0*4]
+ round1 a, b, c, d, e, w_0
+ round1 e, a, b, c, d, w_1
+
+ ldp w_2, w_3, [buf, #2*4]
+ round1 d, e, a, b, c, w_2
+ round1 c, d, e, a, b, w_3
+
+ ldp w_4, w_5, [buf, #4*4]
+ round1 b, c, d, e, a, w_4
+ round1 a, b, c, d, e, w_5
+
+ ldp w_6, w_7, [buf, #6*4]
+ round1 e, a, b, c, d, w_6
+ round1 d, e, a, b, c, w_7
+
+ ldp w_8, w_9, [buf, #8*4]
+ round1 c, d, e, a, b, w_8
+ round1 b, c, d, e, a, w_9
+
+ ldp w_10, w_11, [buf, #10*4]
+ round1 a, b, c, d, e, w_10
+ round1 e, a, b, c, d, w_11
+
+ ldp w_12, w_13, [buf, #12*4]
+ round1 d, e, a, b, c, w_12
+ round1 c, d, e, a, b, w_13
+
+ ldp w_14, w_15, [buf, #14*4]
+ round1 b, c, d, e, a, w_14
+ round1 a, b, c, d, e, w_15
+
+ round1x e, a, b, c, d, w_0, w_13, w_8, w_2
+ round1x d, e, a, b, c, w_1, w_14, w_9, w_3
+ round1x c, d, e, a, b, w_2, w_15, w_10, w_4
+ round1x b, c, d, e, a, w_3, w_0, w_11, w_5
+
+ movz k, #0xeba1 // round constant 2
+ movk k, #0x6ed9, lsl #16
+
+ round2 a, b, c, d, e, w_4, w_1, w_12, w_6
+ round2 e, a, b, c, d, w_5, w_2, w_13, w_7
+ round2 d, e, a, b, c, w_6, w_3, w_14, w_8
+ round2 c, d, e, a, b, w_7, w_4, w_15, w_9
+ round2 b, c, d, e, a, w_8, w_5, w_0, w_10
+
+ round2 a, b, c, d, e, w_9, w_6, w_1, w_11
+ round2 e, a, b, c, d, w_10, w_7, w_2, w_12
+ round2 d, e, a, b, c, w_11, w_8, w_3, w_13
+ round2 c, d, e, a, b, w_12, w_9, w_4, w_14
+ round2 b, c, d, e, a, w_13, w_10, w_5, w_15
+
+ round2 a, b, c, d, e, w_14, w_11, w_6, w_0
+ round2 e, a, b, c, d, w_15, w_12, w_7, w_1
+ round2 d, e, a, b, c, w_0, w_13, w_8, w_2
+ round2 c, d, e, a, b, w_1, w_14, w_9, w_3
+ round2 b, c, d, e, a, w_2, w_15, w_10, w_4
+
+ round2 a, b, c, d, e, w_3, w_0, w_11, w_5
+ round2 e, a, b, c, d, w_4, w_1, w_12, w_6
+ round2 d, e, a, b, c, w_5, w_2, w_13, w_7
+ round2 c, d, e, a, b, w_6, w_3, w_14, w_8
+ round2 b, c, d, e, a, w_7, w_4, w_15, w_9
+
+ movz k, #0xbcdc // round constant 3
+ movk k, #0x8f1b, lsl #16
+
+ round3 a, b, c, d, e, w_8, w_5, w_0, w_10
+ round3 e, a, b, c, d, w_9, w_6, w_1, w_11
+ round3 d, e, a, b, c, w_10, w_7, w_2, w_12
+ round3 c, d, e, a, b, w_11, w_8, w_3, w_13
+ round3 b, c, d, e, a, w_12, w_9, w_4, w_14
+
+ round3 a, b, c, d, e, w_13, w_10, w_5, w_15
+ round3 e, a, b, c, d, w_14, w_11, w_6, w_0
+ round3 d, e, a, b, c, w_15, w_12, w_7, w_1
+ round3 c, d, e, a, b, w_0, w_13, w_8, w_2
+ round3 b, c, d, e, a, w_1, w_14, w_9, w_3
+
+ round3 a, b, c, d, e, w_2, w_15, w_10, w_4
+ round3 e, a, b, c, d, w_3, w_0, w_11, w_5
+ round3 d, e, a, b, c, w_4, w_1, w_12, w_6
+ round3 c, d, e, a, b, w_5, w_2, w_13, w_7
+ round3 b, c, d, e, a, w_6, w_3, w_14, w_8
+
+ round3 a, b, c, d, e, w_7, w_4, w_15, w_9
+ round3 e, a, b, c, d, w_8, w_5, w_0, w_10
+ round3 d, e, a, b, c, w_9, w_6, w_1, w_11
+ round3 c, d, e, a, b, w_10, w_7, w_2, w_12
+ round3 b, c, d, e, a, w_11, w_8, w_3, w_13
+
+ movz k, #0xc1d6 // round constant 4
+ movk k, #0xca62, lsl #16
+
+ round4 a, b, c, d, e, w_12, w_9, w_4, w_14
+ round4 e, a, b, c, d, w_13, w_10, w_5, w_15
+ round4 d, e, a, b, c, w_14, w_11, w_6, w_0
+ round4 c, d, e, a, b, w_15, w_12, w_7, w_1
+ round4 b, c, d, e, a, w_0, w_13, w_8, w_2
+
+ round4 a, b, c, d, e, w_1, w_14, w_9, w_3
+ round4 e, a, b, c, d, w_2, w_15, w_10, w_4
+ round4 d, e, a, b, c, w_3, w_0, w_11, w_5
+ round4 c, d, e, a, b, w_4, w_1, w_12, w_6
+ round4 b, c, d, e, a, w_5, w_2, w_13, w_7
+
+ round4 a, b, c, d, e, w_6, w_3, w_14, w_8
+ round4 e, a, b, c, d, w_7, w_4, w_15, w_9
+ round4 d, e, a, b, c, w_8, w_5, w_0, w_10
+ round4 c, d, e, a, b, w_9, w_6, w_1, w_11
+ round4 b, c, d, e, a, w_10, w_7, w_2, w_12
+
+ round4 a, b, c, d, e, w_11, w_8, w_3, w_13
+ round4 e, a, b, c, d, w_12, w_9, w_4, w_14
+ round4 d, e, a, b, c, w_13, w_10, w_5, w_15
+ round4 c, d, e, a, b, w_14, w_11, w_6, w_0
+ round4 b, c, d, e, a, w_15, w_12, w_7, w_1
+
+ ldp w_0, w_1, [sp, #0] // reload saved SHA1 state
+ ldp w_2, w_3, [sp, #8]
+ ldr w_4, [sp, #16]
+
+ add a, a, w_0
+ add b, b, w_1
+ add c, c, w_2
+ add d, d, w_3
+ add e, e, w_4
+
+ add buf, buf, #64
+ subs len, len, #64
+ bhi 0b
+
+ stp a, b, [ctx, #0] // write updated SHA1 state
+ stp c, d, [ctx, #8]
+ str e, [ctx, #16]
+
+ ldr x19, [sp, #24+0*8]
+ ldp x20, x21, [sp, #24+1*8]
+ ldp x22, x23, [sp, #24+3*8]
+ ldp x24, x25, [sp, #24+5*8]
+ ldp x26, x27, [sp, #24+7*8]
+ add sp, sp, #24+9*8
+
+1: ret
+END(_libmd_sha1block_scalar)
+
+/*
+ * SHA1 implementation using the SHA1 instruction set extension.
+ */
+
+ .arch_extension sha2
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_sha1)
+ /* ctx, buf, len: same as for sha1block_scalar */
+kaddr .req x3
+abcd .req v0
+abcd_q .req q0 // alias for use with scalar instructions
+abcd_s .req s0
+e0 .req s1
+e0_v .req v1
+e1 .req s2
+abcd_saved .req v3
+e0_saved .req v4
+tmp0 .req v5
+tmp1 .req v6
+msg0 .req v16
+msg1 .req v17
+msg2 .req v18
+msg3 .req v19
+k0 .req v20
+k1 .req v21
+k2 .req v22
+k3 .req v23
+
+ ands len, len, #~63 // take length in multiples of block length
+ beq 1f // bail out if input empty
+
+ ldr abcd_q, [ctx, #0]
+ ldr e0, [ctx, #16]
+
+ adrp kaddr, k1234
+ add kaddr, kaddr, #:lo12:k1234
+ ld4r {k0.4s, k1.4s, k2.4s, k3.4s}, [kaddr]
+
+0: mov abcd_saved.16b, abcd.16b
+ mov e0_saved.16b, e0_v.16b
+
+ ld1 {msg0.4s, msg1.4s, msg2.4s, msg3.4s}, [buf], #64
+ rev32 msg0.16b, msg0.16b
+ rev32 msg1.16b, msg1.16b
+ rev32 msg2.16b, msg2.16b
+ rev32 msg3.16b, msg3.16b
+
+ add tmp0.4s, msg0.4s, k0.4s
+ add tmp1.4s, msg1.4s, k0.4s
+
+ /* rounds 0--3 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k0.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 4--7 */
+ sha1h e0, abcd_s
+ sha1c abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k0.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 8--11 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k0.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 12--15 */
+ sha1h e0, abcd_s
+ sha1c abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k1.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 16--19 */
+ sha1h e1, abcd_s
+ sha1c abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k1.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 20--23 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k1.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 24--27 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k1.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 28--31 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k1.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 32--35 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k2.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 36--39 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k2.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 40--43 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k2.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 44--47 */
+ sha1h e0, abcd_s
+ sha1m abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k2.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 48--51 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k2.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 52--55 */
+ sha1h e0, abcd_s
+ sha1m abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k3.4s
+ sha1su1 msg0.4s, msg3.4s
+ sha1su0 msg1.4s, msg2.4s, msg3.4s
+
+ /* rounds 56--59 */
+ sha1h e1, abcd_s
+ sha1m abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg0.4s, k3.4s
+ sha1su1 msg1.4s, msg0.4s
+ sha1su0 msg2.4s, msg3.4s, msg0.4s
+
+ /* rounds 60--63 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg1.4s, k3.4s
+ sha1su1 msg2.4s, msg1.4s
+ sha1su0 msg3.4s, msg0.4s, msg1.4s
+
+ /* rounds 64--67 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+ add tmp0.4s, msg2.4s, k3.4s
+ sha1su1 msg3.4s, msg2.4s
+ sha1su0 msg0.4s, msg1.4s, msg2.4s
+
+ /* rounds 68--71 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+ add tmp1.4s, msg3.4s, k3.4s
+ sha1su1 msg0.4s, msg3.4s
+
+ /* rounds 72--75 */
+ sha1h e1, abcd_s
+ sha1p abcd_q, e0, tmp0.4s
+
+ /* rounds 76--79 */
+ sha1h e0, abcd_s
+ sha1p abcd_q, e1, tmp1.4s
+
+ add e0_v.4s, e0_v.4s, e0_saved.4s
+ add abcd.4s, abcd.4s, abcd_saved.4s
+
+ subs len, len, #64
+ bhi 0b
+
+ str abcd_q, [ctx, #0]
+ str e0, [ctx, #16]
+
+1: ret
+END(_libmd_sha1block_sha1)
+
+ .section .rodata
+ .balign 16
+k1234: .4byte 0x5a827999
+ .4byte 0x6ed9eba1
+ .4byte 0x8f1bbcdc
+ .4byte 0xca62c1d6
+ .size k1234, .-k1234
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/aarch64/sha1dispatch.c b/lib/libmd/aarch64/sha1dispatch.c
new file mode 100644
--- /dev/null
+++ b/lib/libmd/aarch64/sha1dispatch.c
@@ -0,0 +1,24 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <machine/ifunc.h>
+#include <sha.h>
+#include <sys/auxv.h>
+
+extern void _libmd_sha1block_scalar(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_sha1(SHA1_CTX *, const void *, size_t);
+
+DEFINE_IFUNC(, void, sha1_block, (SHA1_CTX *, const void *, size_t))
+{
+ unsigned long hwcap = 0;
+
+ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
+
+ if (hwcap & HWCAP_SHA1)
+ return (_libmd_sha1block_sha1);
+ else
+ return (_libmd_sha1block_scalar);
+}
diff --git a/lib/libmd/amd64/sha1block.S b/lib/libmd/amd64/sha1block.S
new file mode 100644
--- /dev/null
+++ b/lib/libmd/amd64/sha1block.S
@@ -0,0 +1,1851 @@
+/*-
+ * Copyright (c) 2013 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1block_amd64.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * SHA-1 block routine. See sha1c.c for C equivalent.
+ *
+ * There are 80 rounds of 4 types:
+ * - rounds 0-15 are type 1 and load data (round1 macro).
+ * - rounds 16-19 are type 1 and do not load data (round1x macro).
+ * - rounds 20-39 are type 2 and do not load data (round2 macro).
+ * - rounds 40-59 are type 3 and do not load data (round3 macro).
+ * - rounds 60-79 are type 4 and do not load data (round4 macro).
+ *
+ * Each round loads or shuffles the data, then computes a per-round
+ * function of b, c, d, and then mixes the result into and rotates the
+ * five registers a, b, c, d, e holding the intermediate results.
+ *
+ * The register rotation is implemented by rotating the arguments to
+ * the round macros instead of by explicit move instructions.
+ */
+.macro load index
+ mov (\index)*4(%rsi), %r10d
+ bswap %r10d
+ mov %r10d, (\index)*4(%rsp)
+.endm
+
+.macro shuffle index
+ mov ((\index )&0xf)*4(%rsp), %r10d
+ xor ((\index- 3)&0xf)*4(%rsp), %r10d
+ xor ((\index- 8)&0xf)*4(%rsp), %r10d
+ xor ((\index-14)&0xf)*4(%rsp), %r10d
+ rol $1, %r10d
+ mov %r10d, ((\index)&0xf)*4(%rsp)
+.endm
+
+.macro func1 a, b, c, d, e
+ mov \d, %r9d
+ xor \c, %r9d
+ and \b, %r9d
+ xor \d, %r9d
+.endm
+
+.macro func2 a, b, c, d, e
+ mov \b, %r9d
+ xor \c, %r9d
+ xor \d, %r9d
+.endm
+
+.macro func3 a, b, c, d, e
+ mov \b, %r8d
+ or \c, %r8d
+ and \d, %r8d
+ mov \b, %r9d
+ and \c, %r9d
+ or %r8d, %r9d
+.endm
+
+.macro func4 a, b, c, d, e
+ func2 \a, \b, \c, \d, \e
+.endm
+
+.macro mix a, b, c, d, e, const
+ rol $30, \b
+ add %r9d, \e
+ mov \a, %r8d
+ rol $5, %r8d
+ lea \const(\e, %r10d, 1), \e
+ add %r8d, \e
+.endm
+
+.macro round1 a, b, c, d, e, index
+ load \index
+ func1 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x5a827999
+.endm
+
+.macro round1x a, b, c, d, e, index
+ shuffle \index
+ func1 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x5a827999
+.endm
+
+.macro round2 a, b, c, d, e, index
+ shuffle \index
+ func2 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x6ed9eba1
+.endm
+
+.macro round3 a, b, c, d, e, index
+ shuffle \index
+ func3 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0x8f1bbcdc
+.endm
+
+.macro round4 a, b, c, d, e, index
+ shuffle \index
+ func4 \a, \b, \c, \d, \e
+ mix \a, \b, \c, \d, \e, 0xca62c1d6
+.endm
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_scalar)
+ push %rbp
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ push %rdi // rdi: SHA1_CTX
+ sub $64+8, %rsp // 64 bytes for round keys
+ // plus alignment
+
+ mov %rdi, %rbp
+ // rsi: buf
+ and $~63, %rdx // rdx: length in blocks
+ lea (%rsi, %rdx, 1), %rdi // rdi: end pointer
+ mov (%rbp), %eax // c->h0
+ mov 4(%rbp), %ebx // c->h1
+ mov 8(%rbp), %ecx // c->h2
+ mov 12(%rbp), %edx // c->h3
+ mov 16(%rbp), %ebp // c->h4
+
+ cmp %rsi, %rdi // any data to process?
+ je .Lend
+
+.Lloop: mov %eax, %r11d
+ mov %ebx, %r12d
+ mov %ecx, %r13d
+ mov %edx, %r14d
+ mov %ebp, %r15d
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 0
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 1
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 2
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 3
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 4
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 5
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 6
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 7
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 8
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 9
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 10
+ round1 %ebp, %eax, %ebx, %ecx, %edx, 11
+ round1 %edx, %ebp, %eax, %ebx, %ecx, 12
+ round1 %ecx, %edx, %ebp, %eax, %ebx, 13
+ round1 %ebx, %ecx, %edx, %ebp, %eax, 14
+
+ round1 %eax, %ebx, %ecx, %edx, %ebp, 15
+ round1x %ebp, %eax, %ebx, %ecx, %edx, 16
+ round1x %edx, %ebp, %eax, %ebx, %ecx, 17
+ round1x %ecx, %edx, %ebp, %eax, %ebx, 18
+ round1x %ebx, %ecx, %edx, %ebp, %eax, 19
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 20
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 21
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 22
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 23
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 24
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 25
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 26
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 27
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 28
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 29
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 30
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 31
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 32
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 33
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 34
+
+ round2 %eax, %ebx, %ecx, %edx, %ebp, 35
+ round2 %ebp, %eax, %ebx, %ecx, %edx, 36
+ round2 %edx, %ebp, %eax, %ebx, %ecx, 37
+ round2 %ecx, %edx, %ebp, %eax, %ebx, 38
+ round2 %ebx, %ecx, %edx, %ebp, %eax, 39
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 40
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 41
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 42
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 43
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 44
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 45
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 46
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 47
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 48
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 49
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 50
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 51
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 52
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 53
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 54
+
+ round3 %eax, %ebx, %ecx, %edx, %ebp, 55
+ round3 %ebp, %eax, %ebx, %ecx, %edx, 56
+ round3 %edx, %ebp, %eax, %ebx, %ecx, 57
+ round3 %ecx, %edx, %ebp, %eax, %ebx, 58
+ round3 %ebx, %ecx, %edx, %ebp, %eax, 59
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 60
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 61
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 62
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 63
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 64
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 65
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 66
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 67
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 68
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 69
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 70
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 71
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 72
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 73
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 74
+
+ round4 %eax, %ebx, %ecx, %edx, %ebp, 75
+ round4 %ebp, %eax, %ebx, %ecx, %edx, 76
+ round4 %edx, %ebp, %eax, %ebx, %ecx, 77
+ round4 %ecx, %edx, %ebp, %eax, %ebx, 78
+ round4 %ebx, %ecx, %edx, %ebp, %eax, 79
+
+ add %r11d, %eax
+ add %r12d, %ebx
+ add %r13d, %ecx
+ add %r14d, %edx
+ add %r15d, %ebp
+
+ add $64, %rsi
+ cmp %rdi, %rsi
+ jb .Lloop
+
+.Lend: add $64+8, %rsp
+ pop %rdi // SHA1_CTX
+ mov %eax, (%rdi)
+ mov %ebx, 4(%rdi)
+ mov %ecx, 8(%rdi)
+ mov %edx, 12(%rdi)
+ mov %ebp, 16(%rdi)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ pop %rbp
+ ret
+END(_libmd_sha1block_scalar)
+
+/*
+ * This is the implementation using AVX2, BMI1 and BMI2. It is based on:
+ * "SHA-1 implementation with Intel(R) AVX2 instruction set extensions"
+ * From http://software.intel.com/en-us/articles
+ * (look for improving-the-performance-of-the-secure-hash-algorithm-1)
+ * This implementation is 2x unrolled, and interleaves vector instructions,
+ * used to precompute W, with scalar computation of current round
+ * for optimal scheduling.
+ */
+
+ /* trivial helper macros */
+.macro update_hash a, tb, c, d, e
+ add (%r9), \a
+ mov \a, (%r9)
+ add 4(%r9), \tb
+ mov \tb, 4(%r9)
+ add 8(%r9), \c
+ mov \c, 8(%r9)
+ add 12(%r9), \d
+ mov \d, 12(%r9)
+ add 16(%r9), \e
+ mov \e, 16(%r9)
+.endm
+
+ /* help macros for recalc, which does precomputations */
+.macro precalc0 offset
+ vmovdqu \offset(%r10), %xmm0
+.endm
+
+.macro precalc1 offset
+ vinserti128 $1, \offset(%r13), %ymm0, %ymm0
+.endm
+
+.macro precalc2 yreg
+ vpshufb %ymm10, %ymm0, \yreg
+.endm
+
+.macro precalc4 yreg, k_offset
+ vpaddd \k_offset(%r8), \yreg, %ymm0
+.endm
+
+.macro precalc7 offset
+ vmovdqu %ymm0, (\offset)*2(%r14)
+.endm
+
+/*
+ * Message scheduling pre-compute for rounds 0-15
+ * r13 is a pointer to the even 64-byte block
+ * r10 is a pointer to the odd 64-byte block
+ * r14 is a pointer to the temp buffer
+ * xmm0 is used as a temp register
+ * yreg is clobbered as part of the computation
+ * offset chooses a 16 byte chunk within a block
+ * r8 is a pointer to the constants block
+ * k_offset chooses K constants relevant to this round
+ * xmm10 holds the swap mask
+ */
+.macro precalc00_15 offset, yreg
+ precalc0 \offset
+ precalc1 \offset
+ precalc2 \yreg
+ precalc4 \yreg, 0
+ precalc7 \offset
+.endm
+
+ /* helper macros for precalc16_31 */
+.macro precalc16 reg_sub16, reg_sub12, reg_sub4, reg
+ vpalignr $8, \reg_sub16, \reg_sub12, \reg // w[i - 14]
+ vpsrldq $4, \reg_sub4, %ymm0 // w[i - 3]
+.endm
+
+.macro precalc17 reg_sub16, reg_sub8, reg
+ vpxor \reg_sub8, \reg, \reg
+ vpxor \reg_sub16, %ymm0, %ymm0
+.endm
+
+.macro precalc18 reg
+ vpxor %ymm0, \reg, \reg
+ vpslldq $12, \reg, %ymm9
+.endm
+
+.macro precalc19 reg
+ vpslld $1, \reg, %ymm0
+ vpsrld $31, \reg, \reg
+ .endm
+
+.macro precalc20 reg
+ vpor \reg, %ymm0, %ymm0
+ vpslld $2, %ymm9, \reg
+.endm
+
+.macro precalc21 reg
+ vpsrld $30, %ymm9, %ymm9
+ vpxor \reg, %ymm0, %ymm0
+.endm
+
+.macro precalc23 reg, k_offset, offset
+ vpxor %ymm9, %ymm0, \reg
+ vpaddd \k_offset(%r8), \reg, %ymm0
+ vmovdqu %ymm0, (\offset)(%r14)
+.endm
+
+/*
+ * Message scheduling pre-compute for rounds 16-31
+ * calculating last 32 w[i] values in 8 XMM registers
+ * pre-calculate K+w[i] values and store to mem
+ * for later load by ALU add instruction.
+ * "brute force" vectorization for rounds 16-31 only
+ * due to w[i]->w[i-3] dependency.
+ + clobbers 5 input ymm registers REG_SUB*
+ * uses xmm0 and xmm9 as temp registers
+ * As always, r8 is a pointer to constants block
+ * and r14 is a pointer to temp buffer
+ */
+.macro precalc16_31 reg, reg_sub4, reg_sub8, reg_sub12, reg_sub16, k_offset, offset
+ precalc16 \reg_sub16, \reg_sub12, \reg_sub4, \reg
+ precalc17 \reg_sub16, \reg_sub8, \reg
+ precalc18 \reg
+ precalc19 \reg
+ precalc20 \reg
+ precalc21 \reg
+ precalc23 \reg, \k_offset, \offset
+.endm
+
+ /* helper macros for precalc_32_79 */
+.macro precalc32 reg_sub8, reg_sub4
+ vpalignr $8, \reg_sub8, \reg_sub4, %ymm0
+.endm
+
+.macro precalc33 reg_sub28, reg
+ vpxor \reg_sub28, \reg, \reg
+.endm
+
+.macro precalc34 reg_sub16
+ vpxor \reg_sub16, %ymm0, %ymm0
+.endm
+
+.macro precalc35 reg
+ vpxor %ymm0, \reg, \reg
+.endm
+
+.macro precalc36 reg
+ vpslld $2, \reg, %ymm0
+.endm
+
+.macro precalc37 reg
+ vpsrld $30, \reg, \reg
+ vpor \reg, %ymm0, \reg
+.endm
+
+.macro precalc39 reg, k_offset, offset
+ vpaddd \k_offset(%r8), \reg, %ymm0
+ vmovdqu %ymm0, \offset(%r14)
+.endm
+
+.macro precalc32_79 reg, reg_sub4, reg_sub8, reg_sub16, reg_sub28, k_offset, offset
+ precalc32 \reg_sub8, \reg_sub4
+ precalc33 \reg_sub28, \reg
+ precalc34 \reg_sub16
+ precalc35 \reg
+ precalc36 \reg
+ precalc37 \reg
+ precalc39 \reg, \k_offset, \offset
+.endm
+
+.macro precalc
+ precalc00_15 0x00, %ymm15
+ precalc00_15 0x10, %ymm14
+ precalc00_15 0x20, %ymm13
+ precalc00_15 0x30, %ymm12
+ precalc16_31 %ymm8, %ymm12, %ymm13, %ymm14, %ymm15, 0x00, 0x080
+ precalc16_31 %ymm7, %ymm8, %ymm12, %ymm13, %ymm14, 0x20, 0x0a0
+ precalc16_31 %ymm5, %ymm7, %ymm8, %ymm12, %ymm13, 0x20, 0x0c0
+ precalc16_31 %ymm3, %ymm5, %ymm7, %ymm8, %ymm12, 0x20, 0x0e0
+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x20, 0x100
+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x20, 0x120
+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x40, 0x140
+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x40, 0x160
+ precalc32_79 %ymm8, %ymm12, %ymm13, %ymm15, %ymm7, 0x40, 0x180
+ precalc32_79 %ymm7, %ymm8, %ymm12, %ymm14, %ymm5, 0x40, 0x1a0
+ precalc32_79 %ymm5, %ymm7, %ymm8, %ymm13, %ymm3, 0x40, 0x1c0
+ precalc32_79 %ymm3, %ymm5, %ymm7, %ymm12, %ymm15, 0x60, 0x1e0
+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x60, 0x200
+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x60, 0x220
+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x60, 0x240
+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x60, 0x260
+.endm
+
+/*
+ * Macros calculating individual rounds have general form
+ * calc_round_pre + precalc_round + calc_round_post
+ * calc_round_{pre,post} macros follow
+ */
+.macro calc_f1_pre offset, reg_a, reg_b, reg_c, reg_e
+ add \offset(%r15), \reg_e
+ andn \reg_c, \reg_a, %ebp
+ add \reg_b, \reg_e // add F from the previous round
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_b // for the next round
+.endm
+
+/*
+ * Calculate F for the next round
+ */
+.macro calc_f1_post reg_a, reg_b, reg_e
+ and \reg_b, \reg_a // b & c
+ xor %ebp, \reg_a // F1 = (b&c) ^ (~b&d)
+ add %r12d, \reg_e
+.endm
+
+/*
+ * Registers are cyclically rotated:
+ * edx -> eax -> edi -> esi -> ebx -> ecx
+ */
+.macro calc0
+ mov %esi, %ebx // precalculate first round
+ rorx $2, %esi, %esi
+ andn %eax, %ebx, %ebp
+ and %edi, %ebx
+ xor %ebp, %ebx
+ calc_f1_pre 0x0, %ecx, %ebx, %edi, %edx
+ precalc0 0x80
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc1
+ calc_f1_pre 0x4, %edx, %ecx, %esi, %eax
+ precalc1 0x80
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc2
+ calc_f1_pre 0x8, %eax, %edx, %ebx, %edi
+ precalc2 %ymm15
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc3
+ calc_f1_pre 0xc, %edi, %eax, %ecx, %esi
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc4
+ calc_f1_pre 0x20, %esi, %edi, %edx, %ebx
+ precalc4 %ymm15, 0x0
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc5
+ calc_f1_pre 0x24, %ebx, %esi, %eax, %ecx
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc6
+ calc_f1_pre 0x28, %ecx, %ebx, %edi, %edx
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc7
+ calc_f1_pre 0x2c, %edx, %ecx, %esi, %eax
+ precalc7 0x0
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc8
+ calc_f1_pre 0x40, %eax, %edx, %ebx, %edi
+ precalc0 0x90
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc9
+ calc_f1_pre 0x44, %edi, %eax, %ecx, %esi
+ precalc1 0x90
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc10
+ calc_f1_pre 0x48, %esi, %edi, %edx, %ebx
+ precalc2 %ymm14
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc11
+ calc_f1_pre 0x4c, %ebx, %esi, %eax, %ecx
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc12
+ calc_f1_pre 0x60, %ecx, %ebx, %edi, %edx
+ precalc4 %ymm14, 0
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc13
+ calc_f1_pre 0x64, %edx, %ecx, %esi, %eax
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc14
+ calc_f1_pre 0x68, %eax, %edx, %ebx, %edi
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc15
+ calc_f1_pre 0x6c, %edi, %eax, %ecx, %esi
+ precalc7 0x10
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc16
+ calc_f1_pre 0x80, %esi, %edi, %edx, %ebx
+ precalc0 0xa0
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc17
+ calc_f1_pre 0x84, %ebx, %esi, %eax, %ecx
+ precalc1 0xa0
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc18
+ calc_f1_pre 0x88, %ecx, %ebx, %edi, %edx
+ precalc2 %ymm13
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc_f2_pre offset, reg_a, reg_b, reg_e
+ add \offset(%r15), \reg_e
+ add \reg_b, \reg_e // add F from the previous round
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_b // for next round
+.endm
+
+.macro calc_f2_post reg_a, reg_b, reg_c, reg_e
+ xor \reg_b, \reg_a
+ add %r12d, \reg_e
+ xor \reg_c, \reg_a
+.endm
+
+.macro calc19
+ calc_f2_pre 0x8c, %edx, %ecx, %eax
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc20
+ calc_f2_pre 0xa0, %eax, %edx, %edi
+ precalc4 %ymm13, 0x0
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc21
+ calc_f2_pre 0xa4, %edi, %eax, %esi
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc22
+ calc_f2_pre 0xa8, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc23
+ calc_f2_pre 0xac, %ebx, %esi, %ecx
+ precalc7 0x20
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc24
+ calc_f2_pre 0xc0, %ecx, %ebx, %edx
+ precalc0 0xb0
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc25
+ calc_f2_pre 0xc4, %edx, %ecx, %eax
+ precalc1 0xb0
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc26
+ calc_f2_pre 0xc8, %eax, %edx, %edi
+ precalc2 %ymm12
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc27
+ calc_f2_pre 0xcc, %edi, %eax, %esi
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc28
+ calc_f2_pre 0xe0, %esi, %edi, %ebx
+ precalc4 %ymm12, 0x0
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc29
+ calc_f2_pre 0xe4, %ebx, %esi, %ecx
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc30
+ calc_f2_pre 0xe8, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc31
+ calc_f2_pre 0xec, %edx, %ecx, %eax
+ precalc7 0x30
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc32
+ calc_f2_pre 0x100, %eax, %edx, %edi
+ precalc16 %ymm15, %ymm14, %ymm12, %ymm8
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc33
+ calc_f2_pre 0x104, %edi, %eax, %esi
+ precalc17 %ymm15, %ymm13, %ymm8
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc34
+ calc_f2_pre 0x108, %esi, %edi, %ebx
+ precalc18 %ymm8
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc35
+ calc_f2_pre 0x10c, %ebx, %esi, %ecx
+ precalc19 %ymm8
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc36
+ calc_f2_pre 0x120, %ecx, %ebx, %edx
+ precalc20 %ymm8
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc37
+ calc_f2_pre 0x124, %edx, %ecx, %eax
+ precalc21 %ymm8
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc38
+ calc_f2_pre 0x128, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc_f3_pre offset, reg_e
+ add \offset(%r15), \reg_e
+.endm
+
+.macro calc_f3_post reg_a, reg_b, reg_c, reg_e, reg_tb
+ add \reg_tb, \reg_e // add F from the previous round
+ mov \reg_b, %ebp
+ or \reg_a, %ebp
+ rorx $0x1b, \reg_a, %r12d
+ rorx $2, \reg_a, \reg_tb
+ and \reg_c, %ebp // calculate F for the next round
+ and \reg_b, \reg_a
+ or %ebp, \reg_a
+ add %r12d, \reg_e
+.endm
+
+.macro calc39
+ calc_f3_pre 0x12c, %esi
+ precalc23 %ymm8, 0x0, 0x80
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc40
+ calc_f3_pre 0x140, %ebx
+ precalc16 %ymm14, %ymm13, %ymm8, %ymm7
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc41
+ calc_f3_pre 0x144, %ecx
+ precalc17 %ymm14, %ymm12, %ymm7
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc42
+ calc_f3_pre 0x148, %edx
+ precalc18 %ymm7
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc43
+ calc_f3_pre 0x14c, %eax
+ precalc19 %ymm7
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc44
+ calc_f3_pre 0x160, %edi
+ precalc20 %ymm7
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc45
+ calc_f3_pre 0x164, %esi
+ precalc21 %ymm7
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc46
+ calc_f3_pre 0x168, %ebx
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc47
+ calc_f3_pre 0x16c, %ecx
+ vpxor %ymm9, %ymm0, %ymm7
+ vpaddd 0x20(%r8), %ymm7, %ymm0
+ vmovdqu %ymm0, 0xa0(%r14)
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc48
+ calc_f3_pre 0x180, %edx
+ precalc16 %ymm13, %ymm12, %ymm7, %ymm5
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc49
+ calc_f3_pre 0x184, %eax
+ precalc17 %ymm13, %ymm8, %ymm5
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc50
+ calc_f3_pre 0x188, %edi
+ precalc18 %ymm5
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc51
+ calc_f3_pre 0x18c, %esi
+ precalc19 %ymm5
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc52
+ calc_f3_pre 0x1a0, %ebx
+ precalc20 %ymm5
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc53
+ calc_f3_pre 0x1a4, %ecx
+ precalc21 %ymm5
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc54
+ calc_f3_pre 0x1a8, %edx
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc55
+ calc_f3_pre 0x1ac, %eax
+ precalc23 %ymm5, 0x20, 0xc0
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc56
+ calc_f3_pre 0x1c0, %edi
+ precalc16 %ymm12, %ymm8, %ymm5, %ymm3
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc57
+ calc_f3_pre 0x1c4, %esi
+ precalc17 %ymm12, %ymm7, %ymm3
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc58
+ calc_f3_pre 0x1c8, %ebx
+ precalc18 %ymm3
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc59
+ calc_f2_pre 0x1cc, %ebx, %esi, %ecx
+ precalc19 %ymm3
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc60
+ calc_f2_pre 0x1e0, %ecx, %ebx, %edx
+ precalc20 %ymm3
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc61
+ calc_f2_pre 0x1e4, %edx, %ecx, %eax
+ precalc21 %ymm3
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc62
+ calc_f2_pre 0x1e8, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc63
+ calc_f2_pre 0x1ec, %edi, %eax, %esi
+ precalc23 %ymm3, 0x20, 0xe0
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc64
+ calc_f2_pre 0x200, %esi, %edi, %ebx
+ precalc32 %ymm5, %ymm3
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc65
+ calc_f2_pre 0x204, %ebx, %esi, %ecx
+ precalc33 %ymm14, %ymm15
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc66
+ calc_f2_pre 0x208, %ecx, %ebx, %edx
+ precalc34 %ymm8
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc67
+ calc_f2_pre 0x20c, %edx, %ecx, %eax
+ precalc35 %ymm15
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc68
+ calc_f2_pre 0x220, %eax, %edx, %edi
+ precalc36 %ymm15
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc69
+ calc_f2_pre 0x224, %edi, %eax, %esi
+ precalc37 %ymm15
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc70
+ calc_f2_pre 0x228, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc71
+ calc_f2_pre 0x22c, %ebx, %esi, %ecx
+ precalc39 %ymm15, 0x20, 0x100
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc72
+ calc_f2_pre 0x240, %ecx, %ebx, %edx
+ precalc32 %ymm3, %ymm15
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc73
+ calc_f2_pre 0x244, %edx, %ecx, %eax
+ precalc33 %ymm13, %ymm14
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc74
+ calc_f2_pre 0x248, %eax, %edx, %edi
+ precalc34 %ymm7
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc75
+ calc_f2_pre 0x24c, %edi, %eax, %esi
+ precalc35 %ymm14
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc76
+ calc_f2_pre 0x260, %esi, %edi, %ebx
+ precalc36 %ymm14
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc77
+ calc_f2_pre 0x264, %ebx, %esi, %ecx
+ precalc37 %ymm14
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc78
+ calc_f2_pre 0x268, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc79
+ add 0x26c(%r15), %eax
+ add %ecx, %eax
+ rorx $0x1b, %edx, %r12d
+ precalc39 %ymm14, 0x20, 0x120
+ add %r12d, %eax
+.endm
+
+/*
+ * Similar to calc0
+ */
+.macro calc80
+ mov %ecx, %edx // precalculate first round
+ rorx $2, %ecx, %ecx
+ andn %esi, %edx, %ebp
+ and %ebx, %edx
+ xor %ebp, %edx
+ calc_f1_pre 0x10, %eax, %edx, %ebx, %edi
+ precalc32 %ymm15, %ymm14
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc81
+ calc_f1_pre 0x14, %edi, %eax, %ecx, %esi
+ precalc33 %ymm12, %ymm13
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc82
+ calc_f1_pre 0x18, %esi, %edi, %edx, %ebx
+ precalc34 %ymm5
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc83
+ calc_f1_pre 0x1c, %ebx, %esi, %eax, %ecx
+ precalc35 %ymm13
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc84
+ calc_f1_pre 0x30, %ecx, %ebx, %edi, %edx
+ precalc36 %ymm13
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc85
+ calc_f1_pre 0x34, %edx, %ecx, %esi, %eax
+ precalc37 %ymm13
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc86
+ calc_f1_pre 0x38, %eax, %edx, %ebx, %edi
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc87
+ calc_f1_pre 0x3c, %edi, %eax, %ecx, %esi
+ precalc39 %ymm13, 0x40, 0x140
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc88
+ calc_f1_pre 0x50, %esi, %edi, %edx, %ebx
+ precalc32 %ymm14, %ymm13
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc89
+ calc_f1_pre 0x54, %ebx, %esi, %eax, %ecx
+ precalc33 %ymm8, %ymm12
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc90
+ calc_f1_pre 0x58, %ecx, %ebx, %edi, %edx
+ precalc34 %ymm3
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc91
+ calc_f1_pre 0x5c, %edx, %ecx, %esi, %eax
+ precalc35 %ymm12
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc92
+ calc_f1_pre 0x70, %eax, %edx, %ebx, %edi
+ precalc36 %ymm12
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc93
+ calc_f1_pre 0x74, %edi, %eax, %ecx, %esi
+ precalc37 %ymm12
+ calc_f1_post %edi, %edx, %esi
+.endm
+
+.macro calc94
+ calc_f1_pre 0x78, %esi, %edi, %edx, %ebx
+ calc_f1_post %esi, %eax, %ebx
+.endm
+
+.macro calc95
+ calc_f1_pre 0x7c, %ebx, %esi, %eax, %ecx
+ precalc39 %ymm12, 0x40, 0x160
+ calc_f1_post %ebx, %edi, %ecx
+.endm
+
+.macro calc96
+ calc_f1_pre 0x90, %ecx, %ebx, %edi, %edx
+ precalc32 %ymm13, %ymm12
+ calc_f1_post %ecx, %esi, %edx
+.endm
+
+.macro calc97
+ calc_f1_pre 0x94, %edx, %ecx, %esi, %eax
+ precalc33 %ymm7, %ymm8
+ calc_f1_post %edx, %ebx, %eax
+.endm
+
+.macro calc98
+ calc_f1_pre 0x98, %eax, %edx, %ebx, %edi
+ precalc34 %ymm15
+ calc_f1_post %eax, %ecx, %edi
+.endm
+
+.macro calc99
+ calc_f2_pre 0x9c, %edi, %eax, %esi
+ precalc35 %ymm8
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc100
+ calc_f2_pre 0xb0, %esi, %edi, %ebx
+ precalc36 %ymm8
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc101
+ calc_f2_pre 0xb4, %ebx, %esi, %ecx
+ precalc37 %ymm8
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc102
+ calc_f2_pre 0xb8, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc103
+ calc_f2_pre 0xbc, %edx, %ecx, %eax
+ precalc39 %ymm8, 0x40, 0x180
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc104
+ calc_f2_pre 0xd0, %eax, %edx, %edi
+ precalc32 %ymm12, %ymm8
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc105
+ calc_f2_pre 0xd4, %edi, %eax, %esi
+ precalc33 %ymm5, %ymm7
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc106
+ calc_f2_pre 0xd8, %esi, %edi, %ebx
+ precalc34 %ymm14
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc107
+ calc_f2_pre 0xdc, %ebx, %esi, %ecx
+ precalc35 %ymm7
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc108
+ calc_f2_pre 0xf0, %ecx, %ebx, %edx
+ precalc36 %ymm7
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc109
+ calc_f2_pre 0xf4, %edx, %ecx, %eax
+ precalc37 %ymm7
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc110
+ calc_f2_pre 0xf8, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc111
+ calc_f2_pre 0xfc, %edi, %eax, %esi
+ precalc39 %ymm7, 0x40, 0x1a0
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc112
+ calc_f2_pre 0x110, %esi, %edi, %ebx
+ precalc32 %ymm8, %ymm7
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc113
+ calc_f2_pre 0x114, %ebx, %esi, %ecx
+ precalc33 %ymm3, %ymm5
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc114
+ calc_f2_pre 0x118, %ecx, %ebx, %edx
+ precalc34 %ymm13
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc115
+ calc_f2_pre 0x11c, %edx, %ecx, %eax
+ precalc35 %ymm5
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc116
+ calc_f2_pre 0x130, %eax, %edx, %edi
+ precalc37 %ymm5
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc117
+ calc_f2_pre 0x134, %edi, %eax, %esi
+ precalc37 %ymm5
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc118
+ calc_f2_pre 0x138, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc119
+ calc_f3_pre 0x13c, %ecx
+ precalc39 %ymm5, 0x40, 0x1c0
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc120
+ calc_f3_pre 0x150, %edx
+ precalc32 %ymm7, %ymm5
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc121
+ calc_f3_pre 0x154, %eax
+ precalc33 %ymm15, %ymm3
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc122
+ calc_f3_pre 0x158, %edi
+ precalc34 %ymm12
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc123
+ calc_f3_pre 0x15c, %esi
+ precalc35 %ymm3
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc124
+ calc_f3_pre 0x170, %ebx
+ precalc36 %ymm3
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc125
+ calc_f3_pre 0x174, %ecx
+ precalc37 %ymm3
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc126
+ calc_f3_pre 0x178, %edx
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc127
+ calc_f3_pre 0x17c, %eax
+ precalc39 %ymm3, 0x60, 0x1e0
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc128
+ calc_f3_pre 0x190, %edi
+ precalc32 %ymm5, %ymm3
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc129
+ calc_f3_pre 0x194, %esi
+ precalc33 %ymm14, %ymm15
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc130
+ calc_f3_pre 0x198, %ebx
+ precalc34 %ymm8
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc131
+ calc_f3_pre 0x19c, %ecx
+ precalc35 %ymm15
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc132
+ calc_f3_pre 0x1b0, %edx
+ precalc36 %ymm15
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc133
+ calc_f3_pre 0x1b4, %eax
+ precalc37 %ymm15
+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
+.endm
+
+.macro calc134
+ calc_f3_pre 0x1b8, %edi
+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
+.endm
+
+.macro calc135
+ calc_f3_pre 0x1bc, %esi
+ precalc39 %ymm15, 0x60, 0x200
+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
+.endm
+
+.macro calc136
+ calc_f3_pre 0x1d0, %ebx
+ precalc32 %ymm3, %ymm15
+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
+.endm
+
+.macro calc137
+ calc_f3_pre 0x1d4, %ecx
+ precalc33 %ymm13, %ymm14
+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
+.endm
+
+.macro calc138
+ calc_f3_pre 0x1d8, %edx
+ precalc34 %ymm7
+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
+.endm
+
+.macro calc139
+ calc_f2_pre 0x1cc, %edx, %ecx, %eax
+ precalc35 %ymm14
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc140
+ calc_f2_pre 0x1f0, %eax, %edx, %edi
+ precalc36 %ymm14
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc141
+ calc_f2_pre 0x1f4, %edi, %eax, %esi
+ precalc37 %ymm14
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc142
+ calc_f2_pre 0x1f8, %esi, %edi, %ebx
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc143
+ calc_f2_pre 0x1fc, %ebx, %esi, %ecx
+ precalc39 %ymm14, 0x60, 0x220
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc144
+ calc_f2_pre 0x210, %ecx, %ebx, %edx
+ precalc32 %ymm15, %ymm14
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc145
+ calc_f2_pre 0x214, %edx, %ecx, %eax
+ precalc33 %ymm12, %ymm13
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc146
+ calc_f2_pre 0x218, %eax, %edx, %edi
+ precalc34 %ymm5
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc147
+ calc_f2_pre 0x21c, %edi, %eax, %esi
+ precalc35 %ymm13
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc148
+ calc_f2_pre 0x230, %esi, %edi, %ebx
+ precalc36 %ymm13
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc149
+ calc_f2_pre 0x234, %ebx, %esi, %ecx
+ precalc37 %ymm13
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc150
+ calc_f2_pre 0x238, %ecx, %ebx, %edx
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc151
+ calc_f2_pre 0x23c, %edx, %ecx, %eax
+ precalc39 %ymm13, 0x60, 0x240
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc152
+ calc_f2_pre 0x250, %eax, %edx, %edi
+ precalc32 %ymm14, %ymm13
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc153
+ calc_f2_pre 0x254, %edi, %eax, %esi
+ precalc33 %ymm8, %ymm12
+ calc_f2_post %edi, %edx, %ecx, %esi
+.endm
+
+.macro calc154
+ calc_f2_pre 0x258, %esi, %edi, %ebx
+ precalc34 %ymm3
+ calc_f2_post %esi, %eax, %edx, %ebx
+.endm
+
+.macro calc155
+ calc_f2_pre 0x25c, %ebx, %esi, %ecx
+ precalc35 %ymm12
+ calc_f2_post %ebx, %edi, %eax, %ecx
+.endm
+
+.macro calc156
+ calc_f2_pre 0x270, %ecx, %ebx, %edx
+ precalc36 %ymm12
+ calc_f2_post %ecx, %esi, %edi, %edx
+.endm
+
+.macro calc157
+ calc_f2_pre 0x274, %edx, %ecx, %eax
+ precalc37 %ymm12
+ calc_f2_post %edx, %ebx, %esi, %eax
+.endm
+
+.macro calc158
+ calc_f2_pre 0x278, %eax, %edx, %edi
+ calc_f2_post %eax, %ecx, %ebx, %edi
+.endm
+
+.macro calc159
+ add 0x27c(%r15), %esi
+ add %eax, %esi
+ rorx $0x1b, %edi, %r12d
+ precalc39 %ymm12, 0x60, 0x260
+ add %r12d, %esi
+.endm
+
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_avx2)
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ sub $1408+8, %rsp
+
+ and $~63, %rdx
+ lea k_xmm_ar(%rip), %r8
+ mov %rdi, %r9
+ mov %rsi, %r10
+ lea 64(%rsi), %r13
+ lea 64(%rsi, %rdx), %r11
+ cmp %r11, %r13
+ cmovae %r8, %r13
+ vmovdqu bswap_shufb_ctl(%rip), %ymm10
+
+ mov (%r9), %ecx
+ mov 4(%r9), %esi
+ mov 8(%r9), %edi
+ mov 12(%r9), %eax
+ mov 16(%r9), %edx
+ mov %rsp, %r14
+ lea 2*4*80+32(%rsp), %r15
+ precalc // precalc WK for first 2 blocks
+ xchg %r14, %r15
+
+ // this is unrolled
+.Loop: cmp %r8, %r10 // we use the value of R8 (set below)
+ // as a signal of the last block
+ jne .Lbegin
+ add $1408+8, %rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ vzeroupper
+ ret
+
+.Lbegin:
+ calc0
+ calc1
+ calc2
+ calc3
+ calc4
+ calc5
+ calc6
+ calc7
+ calc8
+ calc9
+ calc10
+ calc11
+ calc12
+ calc13
+ calc14
+ calc15
+ calc16
+ calc17
+ calc18
+ calc19
+ calc20
+ calc21
+ calc22
+ calc23
+ calc24
+ calc25
+ calc26
+ calc27
+ calc28
+ calc29
+ calc30
+ calc31
+ calc32
+ calc33
+ calc34
+ calc35
+ calc36
+ calc37
+ calc38
+ calc39
+ calc40
+ calc41
+ calc42
+ calc43
+ calc44
+ calc45
+ calc46
+ calc47
+ calc48
+ calc49
+ calc50
+ calc51
+ calc52
+ calc53
+ calc54
+ calc55
+ calc56
+ calc57
+ calc58
+ calc59
+
+ add $128, %r10 // move to the next even-64-byte block
+ cmp %r11, %r10 // is the current block the last one?
+ cmovae %r10, %r8 // signal the last iteration smartly
+
+ calc60
+ calc61
+ calc62
+ calc63
+ calc64
+ calc65
+ calc66
+ calc67
+ calc68
+ calc69
+ calc70
+ calc71
+ calc72
+ calc73
+ calc74
+ calc75
+ calc76
+ calc77
+ calc78
+ calc79
+
+ update_hash %eax, %edx, %ebx, %esi, %edi
+ cmp %r8, %r10 // is the current block the last one?
+ je .Loop
+ mov %edx, %ecx
+
+ calc80
+ calc81
+ calc82
+ calc83
+ calc84
+ calc85
+ calc86
+ calc87
+ calc88
+ calc89
+ calc90
+ calc91
+ calc92
+ calc93
+ calc94
+ calc95
+ calc96
+ calc97
+ calc98
+ calc99
+ calc100
+ calc101
+ calc102
+ calc103
+ calc104
+ calc105
+ calc106
+ calc107
+ calc108
+ calc109
+ calc110
+ calc111
+ calc112
+ calc113
+ calc114
+ calc115
+ calc116
+ calc117
+ calc118
+ calc119
+ calc120
+ calc121
+ calc122
+ calc123
+ calc124
+ calc125
+ calc126
+ calc127
+ calc128
+ calc129
+ calc130
+ calc131
+ calc132
+ calc133
+ calc134
+ calc135
+ calc136
+ calc137
+ calc138
+ calc139
+
+ add $128, %r13 // move to the next even-64-byte block
+ cmp %r11, %r13 // is the current block the last one?
+ cmovae %r8, %r10
+
+ calc140
+ calc141
+ calc142
+ calc143
+ calc144
+ calc145
+ calc146
+ calc147
+ calc148
+ calc149
+ calc150
+ calc151
+ calc152
+ calc153
+ calc154
+ calc155
+ calc156
+ calc157
+ calc158
+ calc159
+
+ update_hash %esi, %edi, %edx, %ecx, %ebx
+ mov %esi, %r12d // reset state for AVX2 reg permutation
+ mov %edi, %esi
+ mov %edx, %edi
+ mov %ebx, %edx
+ mov %ecx, %eax
+ mov %r12d, %ecx
+ xchg %r14, %r15
+ jmp .Loop
+END(_libmd_sha1block_avx2)
+
+ .section .rodata
+ .balign 32
+k_xmm_ar:
+ .fill 8, 4, 0x5a827999
+ .fill 8, 4, 0x6ed9eba1
+ .fill 8, 4, 0x8f1bbcdc
+ .fill 8, 4, 0xca62c1d6
+ .size k_xmm_ar, .-k_xmm_ar
+
+bswap_shufb_ctl:
+ .4byte 0x00010203
+ .4byte 0x04050607
+ .4byte 0x08090a0b
+ .4byte 0x0c0d0e0f
+ .4byte 0x00010203
+ .4byte 0x04050607
+ .4byte 0x08090a0b
+ .4byte 0x0c0d0e0f
+ .size bswap_shufb_ctl, .-bswap_shufb_ctl
+
+ /*
+ * SHA1 implementation using the Intel SHA extensions (SHANI).
+ *
+ * Imlemented according to the Intel white paper
+ *
+ * S. Gulley, V. Gopal, K. Yap, W. Feghali, J. Guilford,
+ * G. Wolrich: "Intel SHA Extensions: new instruction supporting
+ * the Secure Hash Algorithm on Intel® architecture processors",
+ * July 2013.
+ */
+ // sha1block(SHA1_CTX, buf, len)
+ENTRY(_libmd_sha1block_shani)
+ and $~63, %rdx // round length to block-size multiple
+ lea (%rsi, %rdx, 1), %rcx // end pointer
+ test %rdx, %rdx // nothing to do?
+ je 1f // if so, terminate immediately
+
+ movdqu (%rdi), %xmm6 // h0, h1, h2, h3
+ pxor %xmm7, %xmm7
+ pshufd $0x1b, %xmm6, %xmm6 // h3, h2, h1, h0
+ pinsrd $3, 16(%rdi), %xmm7 // h4 in the highest word of xmm7
+ movdqu shuf_mask(%rip), %xmm4
+
+ // main loop
+0: movdqa %xmm6, %xmm8 // stash ABCD
+ movdqa %xmm7, %xmm9 // stash E
+
+ // rounds 0--3
+ movdqu 0*16(%rsi), %xmm0 // load first message block
+ pshufb %xmm4, %xmm0 // and byte-swap
+ paddd %xmm0, %xmm7 // E += w[0]
+ movdqa %xmm6, %xmm5 // E' = A
+ sha1rnds4 $0, %xmm7, %xmm6 // perform rounds 0--3
+
+ // rounds 4--7
+ movdqu 1*16(%rsi), %xmm1
+ pshufb %xmm4, %xmm1
+ sha1nexte %xmm1, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1rnds4 $0, %xmm5, %xmm6
+ sha1msg1 %xmm1, %xmm0
+
+ // rounds 8--11
+ movdqu 2*16(%rsi), %xmm2
+ pshufb %xmm4, %xmm2
+ sha1nexte %xmm2, %xmm7
+ movdqa %xmm6, %xmm5
+ sha1rnds4 $0, %xmm7, %xmm6
+ sha1msg1 %xmm2, %xmm1
+ pxor %xmm2, %xmm0
+
+.macro midround msg3, msg0, msg1, msg2, e1, e0, k
+ sha1nexte \msg3, \e1
+ movdqa %xmm6, \e0
+ sha1msg2 \msg3, \msg0
+ sha1rnds4 $\k, \e1, %xmm6
+ sha1msg1 \msg3, \msg2
+ pxor \msg3, \msg1
+.endm
+
+ movdqu 3*16(%rsi), %xmm3 // load third message block
+ pshufb %xmm4, %xmm3
+
+ add $4*16, %rsi
+
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 0 // 12--15
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 0 // 16--19
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 20--23
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 1 // 24--27
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 1 // 28--31
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 1 // 32--35
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 36--39
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 40--43
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 2 // 44--47
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 2 // 48--51
+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 2 // 52--55
+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 56--59
+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 3 // 60--63
+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 3 // 64--67
+
+ // rounds 68--71
+ sha1nexte %xmm1, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1msg2 %xmm1, %xmm2
+ sha1rnds4 $3, %xmm5, %xmm6
+ pxor %xmm1, %xmm3
+
+ // rounds 72--75
+ sha1nexte %xmm2, %xmm7
+ movdqa %xmm6, %xmm5
+ sha1msg2 %xmm2, %xmm3
+ sha1rnds4 $3, %xmm7, %xmm6
+
+ // rounds 76--79
+ sha1nexte %xmm3, %xmm5
+ movdqa %xmm6, %xmm7
+ sha1rnds4 $3, %xmm5, %xmm6
+
+ sha1nexte %xmm9, %xmm7 // add saved E
+ paddd %xmm8, %xmm6 // add saved ABCD
+
+ cmp %rsi, %rcx // end reached?
+ jne 0b
+
+ pshufd $0x1b, %xmm6, %xmm6 // restore order of h0--h3
+ movdqu %xmm6, (%rdi) // write h0--h3
+ pextrd $3, %xmm7, 16(%rdi) // write h4
+1: ret
+END(_libmd_sha1block_shani)
+
+ .section .rodata
+ .balign 16
+shuf_mask:
+ .8byte 0x08090a0b0c0d0e0f
+ .8byte 0x0001020304050607
+ .size shuf_mask, .-shuf_mask
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/amd64/sha1dispatch.c b/lib/libmd/amd64/sha1dispatch.c
new file mode 100644
--- /dev/null
+++ b/lib/libmd/amd64/sha1dispatch.c
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 2016 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1block_amd64.go.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/specialreg.h>
+#include <sha.h>
+#include <x86/ifunc.h>
+
+extern void _libmd_sha1block_scalar(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_avx2(SHA1_CTX *, const void *, size_t);
+extern void _libmd_sha1block_shani(SHA1_CTX *, const void *, size_t);
+static void sha1block_avx2_wrapper(SHA1_CTX *, const void *, size_t);
+
+#define AVX2_STDEXT_NEEDED \
+ (CPUID_STDEXT_BMI1 | CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2)
+
+DEFINE_UIFUNC(, void, sha1_block, (SHA1_CTX *, const void *, size_t))
+{
+ if (cpu_stdext_feature & CPUID_STDEXT_SHA)
+ return (_libmd_sha1block_shani);
+ if ((cpu_stdext_feature & AVX2_STDEXT_NEEDED) == AVX2_STDEXT_NEEDED)
+ return (sha1block_avx2_wrapper);
+ else
+ return (_libmd_sha1block_scalar);
+}
+
+static void
+sha1block_avx2_wrapper(SHA1_CTX *c, const void *data, size_t len)
+{
+ if (len >= 256) {
+ /*
+ * sha1block_avx2 calculates sha1 for 2 block per iteration.
+ * It also interleaves the precalculation for next the block.
+ * So it may read up-to 192 bytes past the end of p.
+ * We may add checks inside sha1block_avx2, but this will
+ * just turn it into a copy of sha1block_scalar,
+ * so call it directly, instead.
+ */
+ size_t safe_len = len - 128;
+
+ if (safe_len % 128 != 0)
+ safe_len -= 64;
+
+ _libmd_sha1block_avx2(c, data, safe_len);
+ _libmd_sha1block_scalar(c, data + safe_len, len - safe_len);
+ } else
+ _libmd_sha1block_scalar(c, data, len);
+}
diff --git a/lib/libmd/i386/sha.S b/lib/libmd/i386/sha.S
deleted file mode 100644
--- a/lib/libmd/i386/sha.S
+++ /dev/null
@@ -1,1951 +0,0 @@
-/* -*- Fundamental -*- Emacs' assembler mode hoses this file */
-#ifndef PIC
-/* Run the C pre-processor over this file with one of the following defined
- * ELF - elf object files,
- * OUT - a.out object files,
- * BSDI - BSDI style a.out object files
- * SOL - Solaris style elf
- */
-
-#define TYPE(a,b) .type a,b
-#define SIZE(a,b) .size a,b
-
-#if defined(OUT) || defined(BSDI)
-#define sha1_block_x86 _sha1_block_x86
-
-#endif
-
-#ifdef OUT
-#define OK 1
-#define ALIGN 4
-#endif
-
-#ifdef BSDI
-#define OK 1
-#define ALIGN 4
-#undef SIZE
-#undef TYPE
-#define SIZE(a,b)
-#define TYPE(a,b)
-#endif
-
-#if defined(ELF) || defined(SOL)
-#define OK 1
-#define ALIGN 4
-#endif
-
-#ifndef OK
-You need to define one of
-ELF - elf systems - linux-elf, NetBSD and DG-UX
-OUT - a.out systems - linux-a.out and FreeBSD
-SOL - solaris systems, which are elf with strange comment lines
-BSDI - a.out with a very primative version of as.
-#endif
-
-/* Let the Assembler begin :-) */
- /* Don't even think of reading this code */
- /* It was automatically generated by sha1-586.pl */
- /* Which is a perl program used to generate the x86 assember for */
- /* any of elf, a.out, BSDI,Win32, or Solaris */
- /* eric <eay@cryptsoft.com> */
-
- .file "sha1-586.s"
- .version "01.01"
-gcc2_compiled.:
-.text
- .p2align ALIGN
-.globl sha1_block_x86
- TYPE(sha1_block_x86,@function)
-sha1_block_x86:
- pushl %esi
- pushl %ebp
- movl 20(%esp), %eax
- movl 16(%esp), %esi
- addl %esi, %eax
- movl 12(%esp), %ebp
- pushl %ebx
- subl $64, %eax
- pushl %edi
- movl 4(%ebp), %ebx
- subl $72, %esp
- movl 12(%ebp), %edx
- movl 16(%ebp), %edi
- movl 8(%ebp), %ecx
- movl %eax, 68(%esp)
- /* First we need to setup the X array */
- movl (%esi), %eax
-.L000start:
- /* First, load the words onto the stack in network byte order */
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, (%esp)
- movl 4(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 4(%esp)
- movl 8(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 8(%esp)
- movl 12(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 12(%esp)
- movl 16(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 16(%esp)
- movl 20(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 20(%esp)
- movl 24(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 24(%esp)
- movl 28(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 28(%esp)
- movl 32(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 32(%esp)
- movl 36(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 36(%esp)
- movl 40(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 40(%esp)
- movl 44(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 44(%esp)
- movl 48(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 48(%esp)
- movl 52(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 52(%esp)
- movl 56(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 56(%esp)
- movl 60(%esi), %eax
-.byte 15
-.byte 200 /* bswapl %eax */
- movl %eax, 60(%esp)
- /* We now have the X array on the stack */
- /* starting at sp-4 */
- movl %esi, 64(%esp)
-
- /* Start processing */
- movl (%ebp), %eax
- /* 00_15 0 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl (%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 4(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 2 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 8(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 12(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 00_15 4 */
- movl %edi, %ebx
- movl %ecx, %ebp
- xorl %esi, %ebx
- roll $5, %ebp
- andl %edx, %ebx
- addl %eax, %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- movl 16(%esp), %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %esi, %ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- movl %edx, %eax
- addl %ebp, %ebx
- xorl %edi, %eax
- movl %ebx, %ebp
- andl %ecx, %eax
- roll $5, %ebp
- addl %esi, %ebp
- movl 20(%esp), %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- xorl %edi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %ebp, %eax
- /* 00_15 6 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl 24(%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 28(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 8 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 32(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 36(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 00_15 10 */
- movl %edi, %ebx
- movl %ecx, %ebp
- xorl %esi, %ebx
- roll $5, %ebp
- andl %edx, %ebx
- addl %eax, %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- movl 40(%esp), %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %esi, %ebx
- leal 1518500249(%ebp,%eax,1),%ebp
- movl %edx, %eax
- addl %ebp, %ebx
- xorl %edi, %eax
- movl %ebx, %ebp
- andl %ecx, %eax
- roll $5, %ebp
- addl %esi, %ebp
- movl 44(%esp), %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- xorl %edi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- leal 1518500249(%ebp,%esi,1),%ebp
- addl %ebp, %eax
- /* 00_15 12 */
- movl %ecx, %esi
- movl %eax, %ebp
- xorl %edx, %esi
- roll $5, %ebp
- andl %ebx, %esi
- addl %edi, %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- movl 48(%esp), %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %edx, %esi
- leal 1518500249(%ebp,%edi,1),%ebp
- movl %ebx, %edi
- addl %ebp, %esi
- xorl %ecx, %edi
- movl %esi, %ebp
- andl %eax, %edi
- roll $5, %ebp
- addl %edx, %ebp
- movl 52(%esp), %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- xorl %ecx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- leal 1518500249(%ebp,%edx,1),%ebp
- addl %ebp, %edi
- /* 00_15 14 */
- movl %eax, %edx
- movl %edi, %ebp
- xorl %ebx, %edx
- roll $5, %ebp
- andl %esi, %edx
- addl %ecx, %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- movl 56(%esp), %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebx, %edx
- leal 1518500249(%ebp,%ecx,1),%ebp
- movl %esi, %ecx
- addl %ebp, %edx
- xorl %eax, %ecx
- movl %edx, %ebp
- andl %edi, %ecx
- roll $5, %ebp
- addl %ebx, %ebp
- movl 60(%esp), %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- xorl %eax, %ecx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- leal 1518500249(%ebp,%ebx,1),%ebp
- addl %ebp, %ecx
- /* 16_19 16 */
- nop
- movl (%esp), %ebp
- movl 8(%esp), %ebx
- xorl %ebp, %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %ebx
- movl %edi, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %esi, %ebp
- movl %ebx, (%esp)
- andl %edx, %ebp
- leal 1518500249(%ebx,%eax,1),%ebx
- xorl %esi, %ebp
- movl %ecx, %eax
- addl %ebp, %ebx
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- movl 4(%esp), %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 56(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %edx, %ebp
- xorl %edi, %ebp
- movl %eax, 4(%esp)
- andl %ecx, %ebp
- leal 1518500249(%eax,%esi,1),%eax
- xorl %edi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 16_19 18 */
- movl 8(%esp), %ebp
- movl 16(%esp), %esi
- xorl %ebp, %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl 60(%esp), %ebp
- xorl %ebp, %esi
- movl %ecx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %edx, %ebp
- movl %esi, 8(%esp)
- andl %ebx, %ebp
- leal 1518500249(%esi,%edi,1),%esi
- xorl %edx, %ebp
- movl %eax, %edi
- addl %ebp, %esi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- movl 12(%esp), %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl (%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %ebx, %ebp
- xorl %ecx, %ebp
- movl %edi, 12(%esp)
- andl %eax, %ebp
- leal 1518500249(%edi,%edx,1),%edi
- xorl %ecx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 20_39 20 */
- movl 16(%esp), %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 4(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 16(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 21 */
- movl 20(%esp), %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 8(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 20(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 22 */
- movl 24(%esp), %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 12(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 24(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 23 */
- movl 28(%esp), %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 16(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 28(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 24 */
- movl 32(%esp), %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 20(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 32(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 25 */
- movl 36(%esp), %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 24(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 36(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 26 */
- movl 40(%esp), %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 28(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 40(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 27 */
- movl 44(%esp), %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 32(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 44(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 28 */
- movl 48(%esp), %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 36(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 48(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 29 */
- movl 52(%esp), %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 40(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 52(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 30 */
- movl 56(%esp), %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 44(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 56(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 31 */
- movl 60(%esp), %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 48(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 60(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 32 */
- movl (%esp), %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 32(%esp), %ebp
- xorl %ebp, %edx
- movl 52(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, (%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 33 */
- movl 4(%esp), %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 36(%esp), %ebp
- xorl %ebp, %ecx
- movl 56(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 4(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 34 */
- movl 8(%esp), %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 40(%esp), %ebp
- xorl %ebp, %ebx
- movl 60(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 8(%esp)
- xorl %esi, %ebp
- leal 1859775393(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 35 */
- movl 12(%esp), %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 44(%esp), %ebp
- xorl %ebp, %eax
- movl (%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 12(%esp)
- xorl %edi, %ebp
- leal 1859775393(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 36 */
- movl 16(%esp), %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 48(%esp), %ebp
- xorl %ebp, %esi
- movl 4(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 16(%esp)
- xorl %edx, %ebp
- leal 1859775393(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 37 */
- movl 20(%esp), %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 52(%esp), %ebp
- xorl %ebp, %edi
- movl 8(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 20(%esp)
- xorl %ecx, %ebp
- leal 1859775393(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 38 */
- movl 24(%esp), %edx
- movl 32(%esp), %ebp
- xorl %ebp, %edx
- movl 56(%esp), %ebp
- xorl %ebp, %edx
- movl 12(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 24(%esp)
- xorl %ebx, %ebp
- leal 1859775393(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 39 */
- movl 28(%esp), %ecx
- movl 36(%esp), %ebp
- xorl %ebp, %ecx
- movl 60(%esp), %ebp
- xorl %ebp, %ecx
- movl 16(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 28(%esp)
- xorl %eax, %ebp
- leal 1859775393(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 40_59 40 */
- movl 32(%esp), %ebx
- movl 40(%esp), %ebp
- xorl %ebp, %ebx
- movl (%esp), %ebp
- xorl %ebp, %ebx
- movl 20(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 32(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 36(%esp), %eax
- addl %ebp, %ebx
- movl 44(%esp), %ebp
- xorl %ebp, %eax
- movl 4(%esp), %ebp
- xorl %ebp, %eax
- movl 24(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 36(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 41 */
- /* 40_59 42 */
- movl 40(%esp), %esi
- movl 48(%esp), %ebp
- xorl %ebp, %esi
- movl 8(%esp), %ebp
- xorl %ebp, %esi
- movl 28(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, 40(%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 44(%esp), %edi
- addl %ebp, %esi
- movl 52(%esp), %ebp
- xorl %ebp, %edi
- movl 12(%esp), %ebp
- xorl %ebp, %edi
- movl 32(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 44(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 43 */
- /* 40_59 44 */
- movl 48(%esp), %edx
- movl 56(%esp), %ebp
- xorl %ebp, %edx
- movl 16(%esp), %ebp
- xorl %ebp, %edx
- movl 36(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 48(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 52(%esp), %ecx
- addl %ebp, %edx
- movl 60(%esp), %ebp
- xorl %ebp, %ecx
- movl 20(%esp), %ebp
- xorl %ebp, %ecx
- movl 40(%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 52(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 45 */
- /* 40_59 46 */
- movl 56(%esp), %ebx
- movl (%esp), %ebp
- xorl %ebp, %ebx
- movl 24(%esp), %ebp
- xorl %ebp, %ebx
- movl 44(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 56(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 60(%esp), %eax
- addl %ebp, %ebx
- movl 4(%esp), %ebp
- xorl %ebp, %eax
- movl 28(%esp), %ebp
- xorl %ebp, %eax
- movl 48(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 60(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 47 */
- /* 40_59 48 */
- movl (%esp), %esi
- movl 8(%esp), %ebp
- xorl %ebp, %esi
- movl 32(%esp), %ebp
- xorl %ebp, %esi
- movl 52(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, (%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 4(%esp), %edi
- addl %ebp, %esi
- movl 12(%esp), %ebp
- xorl %ebp, %edi
- movl 36(%esp), %ebp
- xorl %ebp, %edi
- movl 56(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 4(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 49 */
- /* 40_59 50 */
- movl 8(%esp), %edx
- movl 16(%esp), %ebp
- xorl %ebp, %edx
- movl 40(%esp), %ebp
- xorl %ebp, %edx
- movl 60(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 8(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 12(%esp), %ecx
- addl %ebp, %edx
- movl 20(%esp), %ebp
- xorl %ebp, %ecx
- movl 44(%esp), %ebp
- xorl %ebp, %ecx
- movl (%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 12(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 51 */
- /* 40_59 52 */
- movl 16(%esp), %ebx
- movl 24(%esp), %ebp
- xorl %ebp, %ebx
- movl 48(%esp), %ebp
- xorl %ebp, %ebx
- movl 4(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 16(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 20(%esp), %eax
- addl %ebp, %ebx
- movl 28(%esp), %ebp
- xorl %ebp, %eax
- movl 52(%esp), %ebp
- xorl %ebp, %eax
- movl 8(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 20(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 53 */
- /* 40_59 54 */
- movl 24(%esp), %esi
- movl 32(%esp), %ebp
- xorl %ebp, %esi
- movl 56(%esp), %ebp
- xorl %ebp, %esi
- movl 12(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- orl %ecx, %ebp
- movl %esi, 24(%esp)
- andl %edx, %ebp
- leal 2400959708(%esi,%edi,1),%esi
- movl %ebx, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- andl %ecx, %edi
- orl %edi, %ebp
- movl %eax, %edi
- roll $5, %edi
- addl %edi, %ebp
- movl 28(%esp), %edi
- addl %ebp, %esi
- movl 36(%esp), %ebp
- xorl %ebp, %edi
- movl 60(%esp), %ebp
- xorl %ebp, %edi
- movl 16(%esp), %ebp
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- xorl %ebp, %edi
-.byte 209
-.byte 199 /* roll $1 %edi */
- movl %eax, %ebp
- movl %edi, 28(%esp)
- orl %ebx, %ebp
- leal 2400959708(%edi,%edx,1),%edi
- movl %eax, %edx
- andl %ecx, %ebp
- andl %ebx, %edx
- orl %edx, %ebp
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edi
- /* 40_59 55 */
- /* 40_59 56 */
- movl 32(%esp), %edx
- movl 40(%esp), %ebp
- xorl %ebp, %edx
- movl (%esp), %ebp
- xorl %ebp, %edx
- movl 20(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- orl %eax, %ebp
- movl %edx, 32(%esp)
- andl %ebx, %ebp
- leal 2400959708(%edx,%ecx,1),%edx
- movl %esi, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- andl %eax, %ecx
- orl %ecx, %ebp
- movl %edi, %ecx
- roll $5, %ecx
- addl %ecx, %ebp
- movl 36(%esp), %ecx
- addl %ebp, %edx
- movl 44(%esp), %ebp
- xorl %ebp, %ecx
- movl 4(%esp), %ebp
- xorl %ebp, %ecx
- movl 24(%esp), %ebp
-.byte 209
-.byte 206 /* rorl $1 %esi */
- xorl %ebp, %ecx
-.byte 209
-.byte 193 /* roll $1 %ecx */
- movl %edi, %ebp
- movl %ecx, 36(%esp)
- orl %esi, %ebp
- leal 2400959708(%ecx,%ebx,1),%ecx
- movl %edi, %ebx
- andl %eax, %ebp
- andl %esi, %ebx
- orl %ebx, %ebp
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ebp
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ecx
- /* 40_59 57 */
- /* 40_59 58 */
- movl 40(%esp), %ebx
- movl 48(%esp), %ebp
- xorl %ebp, %ebx
- movl 8(%esp), %ebp
- xorl %ebp, %ebx
- movl 28(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- orl %edi, %ebp
- movl %ebx, 40(%esp)
- andl %esi, %ebp
- leal 2400959708(%ebx,%eax,1),%ebx
- movl %edx, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- andl %edi, %eax
- orl %eax, %ebp
- movl %ecx, %eax
- roll $5, %eax
- addl %eax, %ebp
- movl 44(%esp), %eax
- addl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 32(%esp), %ebp
-.byte 209
-.byte 202 /* rorl $1 %edx */
- xorl %ebp, %eax
-.byte 209
-.byte 192 /* roll $1 %eax */
- movl %ecx, %ebp
- movl %eax, 44(%esp)
- orl %edx, %ebp
- leal 2400959708(%eax,%esi,1),%eax
- movl %ecx, %esi
- andl %edi, %ebp
- andl %edx, %esi
- orl %esi, %ebp
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %ebp
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %eax
- /* 40_59 59 */
- /* 20_39 60 */
- movl 48(%esp), %esi
- movl 56(%esp), %ebp
- xorl %ebp, %esi
- movl 16(%esp), %ebp
- xorl %ebp, %esi
- movl 36(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 48(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 61 */
- movl 52(%esp), %edi
- movl 60(%esp), %ebp
- xorl %ebp, %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 40(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 52(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 62 */
- movl 56(%esp), %edx
- movl (%esp), %ebp
- xorl %ebp, %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 44(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 56(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 63 */
- movl 60(%esp), %ecx
- movl 4(%esp), %ebp
- xorl %ebp, %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 48(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 60(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 64 */
- movl (%esp), %ebx
- movl 8(%esp), %ebp
- xorl %ebp, %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 52(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, (%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 65 */
- movl 4(%esp), %eax
- movl 12(%esp), %ebp
- xorl %ebp, %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 56(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 4(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 66 */
- movl 8(%esp), %esi
- movl 16(%esp), %ebp
- xorl %ebp, %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl 60(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 8(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 67 */
- movl 12(%esp), %edi
- movl 20(%esp), %ebp
- xorl %ebp, %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl (%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 12(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 68 */
- movl 16(%esp), %edx
- movl 24(%esp), %ebp
- xorl %ebp, %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 4(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 16(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 69 */
- movl 20(%esp), %ecx
- movl 28(%esp), %ebp
- xorl %ebp, %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 8(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 20(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 70 */
- movl 24(%esp), %ebx
- movl 32(%esp), %ebp
- xorl %ebp, %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 12(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 24(%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 71 */
- movl 28(%esp), %eax
- movl 36(%esp), %ebp
- xorl %ebp, %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 16(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 28(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 72 */
- movl 32(%esp), %esi
- movl 40(%esp), %ebp
- xorl %ebp, %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 20(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 32(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 73 */
- movl 36(%esp), %edi
- movl 44(%esp), %ebp
- xorl %ebp, %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 24(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 36(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %ebp, %edx
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
- /* 20_39 74 */
- movl 40(%esp), %edx
- movl 48(%esp), %ebp
- xorl %ebp, %edx
- movl 8(%esp), %ebp
- xorl %ebp, %edx
- movl 28(%esp), %ebp
- xorl %ebp, %edx
- movl %esi, %ebp
-.byte 209
-.byte 194 /* roll $1 %edx */
- xorl %eax, %ebp
- movl %edx, 40(%esp)
- xorl %ebx, %ebp
- leal 3395469782(%edx,%ecx,1),%edx
- movl %edi, %ecx
- roll $5, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ebp, %ecx
-.byte 209
-.byte 206 /* rorl $1 %esi */
- addl %ecx, %edx
- /* 20_39 75 */
- movl 44(%esp), %ecx
- movl 52(%esp), %ebp
- xorl %ebp, %ecx
- movl 12(%esp), %ebp
- xorl %ebp, %ecx
- movl 32(%esp), %ebp
- xorl %ebp, %ecx
- movl %edi, %ebp
-.byte 209
-.byte 193 /* roll $1 %ecx */
- xorl %esi, %ebp
- movl %ecx, 44(%esp)
- xorl %eax, %ebp
- leal 3395469782(%ecx,%ebx,1),%ecx
- movl %edx, %ebx
- roll $5, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebp, %ebx
-.byte 209
-.byte 207 /* rorl $1 %edi */
- addl %ebx, %ecx
- /* 20_39 76 */
- movl 48(%esp), %ebx
- movl 56(%esp), %ebp
- xorl %ebp, %ebx
- movl 16(%esp), %ebp
- xorl %ebp, %ebx
- movl 36(%esp), %ebp
- xorl %ebp, %ebx
- movl %edx, %ebp
-.byte 209
-.byte 195 /* roll $1 %ebx */
- xorl %edi, %ebp
- movl %ebx, 48(%esp)
- xorl %esi, %ebp
- leal 3395469782(%ebx,%eax,1),%ebx
- movl %ecx, %eax
- roll $5, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %ebp, %eax
-.byte 209
-.byte 202 /* rorl $1 %edx */
- addl %eax, %ebx
- /* 20_39 77 */
- movl 52(%esp), %eax
- movl 60(%esp), %ebp
- xorl %ebp, %eax
- movl 20(%esp), %ebp
- xorl %ebp, %eax
- movl 40(%esp), %ebp
- xorl %ebp, %eax
- movl %ecx, %ebp
-.byte 209
-.byte 192 /* roll $1 %eax */
- xorl %edx, %ebp
- movl %eax, 52(%esp)
- xorl %edi, %ebp
- leal 3395469782(%eax,%esi,1),%eax
- movl %ebx, %esi
- roll $5, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %ebp, %esi
-.byte 209
-.byte 201 /* rorl $1 %ecx */
- addl %esi, %eax
- /* 20_39 78 */
- movl 56(%esp), %esi
- movl (%esp), %ebp
- xorl %ebp, %esi
- movl 24(%esp), %ebp
- xorl %ebp, %esi
- movl 44(%esp), %ebp
- xorl %ebp, %esi
- movl %ebx, %ebp
-.byte 209
-.byte 198 /* roll $1 %esi */
- xorl %ecx, %ebp
- movl %esi, 56(%esp)
- xorl %edx, %ebp
- leal 3395469782(%esi,%edi,1),%esi
- movl %eax, %edi
- roll $5, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %ebp, %edi
-.byte 209
-.byte 203 /* rorl $1 %ebx */
- addl %edi, %esi
- /* 20_39 79 */
- movl 60(%esp), %edi
- movl 4(%esp), %ebp
- xorl %ebp, %edi
- movl 28(%esp), %ebp
- xorl %ebp, %edi
- movl 48(%esp), %ebp
- xorl %ebp, %edi
- movl %eax, %ebp
-.byte 209
-.byte 199 /* roll $1 %edi */
- xorl %ebx, %ebp
- movl %edi, 60(%esp)
- xorl %ecx, %ebp
- leal 3395469782(%edi,%edx,1),%edi
- movl %esi, %edx
- roll $5, %edx
- addl %ebp, %edx
- movl 92(%esp), %ebp
-.byte 209
-.byte 200 /* rorl $1 %eax */
- addl %edx, %edi
-.byte 209
-.byte 200 /* rorl $1 %eax */
- /* End processing */
-
- movl 12(%ebp), %edx
- addl %ebx, %edx
- movl 4(%ebp), %ebx
- addl %esi, %ebx
- movl %eax, %esi
- movl (%ebp), %eax
- movl %edx, 12(%ebp)
- addl %edi, %eax
- movl 16(%ebp), %edi
- addl %ecx, %edi
- movl 8(%ebp), %ecx
- addl %esi, %ecx
- movl %eax, (%ebp)
- movl 64(%esp), %esi
- movl %ecx, 8(%ebp)
- addl $64, %esi
- movl 68(%esp), %eax
- movl %edi, 16(%ebp)
- cmpl %esi, %eax
- movl %ebx, 4(%ebp)
- jb .L001end
- movl (%esi), %eax
- jmp .L000start
-.L001end:
- addl $72, %esp
- popl %edi
- popl %ebx
- popl %ebp
- popl %esi
- ret
-.sha1_block_x86_end:
- SIZE(sha1_block_x86,.sha1_block_x86_end-sha1_block_x86)
-.ident "desasm.pl"
-#endif
diff --git a/lib/libmd/sha1c.c b/lib/libmd/sha1c.c
--- a/lib/libmd/sha1c.c
+++ b/lib/libmd/sha1c.c
@@ -1,476 +1,244 @@
-/* crypto/sha/sha1dgst.c */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
+/*-
+ * Copyright (c) 2009 The Go Authors. All rights reserved.
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * Adapted from Go's crypto/sha1/sha1.go.
*
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young (eay@cryptsoft.com)"
- * The word 'cryptographic' can be left out if the routines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/types.h>
-
-#include <stdio.h>
+#include <assert.h>
+#include <sha.h>
+#include <stdint.h>
#include <string.h>
+#include <strings.h>
+#include <sys/endian.h>
-#if 0
-#include <machine/ansi.h> /* we use the __ variants of bit-sized types */
+#ifdef SHA1_ASM
+extern void sha1_block(SHA1_CTX *, const void *, size_t);
+#else
+static void sha1_block(SHA1_CTX *, const void *, size_t);
#endif
-#include <machine/endian.h>
-#undef SHA_0
-#define SHA_1
-#include "sha.h"
-#include "sha_locl.h"
+#define INIT0 0x67452301
+#define INIT1 0xEFCDAB89
+#define INIT2 0x98BADCFE
+#define INIT3 0x10325476
+#define INIT4 0xC3D2E1F0
-/*
- * The assembly-language code is not position-independent, so don't
- * try to use it in a shared library.
- */
-#ifdef PIC
-#undef SHA1_ASM
-#endif
+#define K0 0x5A827999
+#define K1 0x6ED9EBA1
+#define K2 0x8F1BBCDC
+#define K3 0xCA62C1D6
-static char *SHA1_version="SHA1 part of SSLeay 0.9.0b 11-Oct-1998";
+void
+SHA1_Init(SHA1_CTX *c)
+{
+ c->h0 = INIT0;
+ c->h1 = INIT1;
+ c->h2 = INIT2;
+ c->h3 = INIT3;
+ c->h4 = INIT4;
+ c->Nl = 0;
+ c->Nh = 0;
+ c->num = 0;
+}
-/* Implemented from SHA-1 document - The Secure Hash Algorithm
- */
+void
+SHA1_Update(SHA1_CTX *c, const void *data, size_t len)
+{
+ uint64_t nn;
+ const char *p = data;
-#define INIT_DATA_h0 (unsigned long)0x67452301L
-#define INIT_DATA_h1 (unsigned long)0xefcdab89L
-#define INIT_DATA_h2 (unsigned long)0x98badcfeL
-#define INIT_DATA_h3 (unsigned long)0x10325476L
-#define INIT_DATA_h4 (unsigned long)0xc3d2e1f0L
-
-#define K_00_19 0x5a827999L
-#define K_20_39 0x6ed9eba1L
-#define K_40_59 0x8f1bbcdcL
-#define K_60_79 0xca62c1d6L
-
-#ifndef NOPROTO
-# ifdef SHA1_ASM
- void sha1_block_x86(SHA_CTX *c, const u_int32_t *p, int num);
-# define sha1_block sha1_block_x86
-# else
- void sha1_block(SHA_CTX *c, const u_int32_t *p, int num);
-# endif
-#else
-# ifdef SHA1_ASM
- void sha1_block_x86();
-# define sha1_block sha1_block_x86
-# else
- void sha1_block();
-# endif
-#endif
+ nn = (uint64_t)c->Nl | (uint64_t)c->Nh << 32;
+ nn += len;
+ c->Nl = (uint32_t)nn;
+ c->Nh = (uint32_t)(nn >> 32);
+ if (c->num > 0) {
+ size_t n = SHA_CBLOCK - c->num;
-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
-# define M_c2nl c2l
-# define M_p_c2nl p_c2l
-# define M_c2nl_p c2l_p
-# define M_p_c2nl_p p_c2l_p
-# define M_nl2c l2c
-#else
-# define M_c2nl c2nl
-# define M_p_c2nl p_c2nl
-# define M_c2nl_p c2nl_p
-# define M_p_c2nl_p p_c2nl_p
-# define M_nl2c nl2c
-#endif
+ if (n > len)
+ n = len;
+
+ memcpy((char *)c->data + c->num, p, n);
+ c->num += n;
+ if (c->num == SHA_CBLOCK) {
+ sha1_block(c, (void *)c->data, SHA_CBLOCK);
+ c->num = 0;
+ }
+
+ p += n;
+ len -= n;
+ }
+
+ if (len >= SHA_CBLOCK) {
+ size_t n = len & ~(size_t)(SHA_CBLOCK - 1);
+
+ sha1_block(c, p, n);
+ p += n;
+ len -= n;
+ }
-void SHA1_Init(SHA_CTX *c)
- {
- c->h0=INIT_DATA_h0;
- c->h1=INIT_DATA_h1;
- c->h2=INIT_DATA_h2;
- c->h3=INIT_DATA_h3;
- c->h4=INIT_DATA_h4;
- c->Nl=0;
- c->Nh=0;
- c->num=0;
+ if (len > 0) {
+ memcpy(c->data, p, len);
+ c->num = len;
}
+}
void
-SHA1_Update(SHA_CTX *c, const void *in, size_t len)
+SHA1_Final(unsigned char *md, SHA1_CTX *c)
{
- u_int32_t *p;
- int ew,ec,sw,sc;
- u_int32_t l;
- const unsigned char *data = in;
-
- if (len == 0) return;
-
- l=(c->Nl+(len<<3))&0xffffffffL;
- if (l < c->Nl) /* overflow */
- c->Nh++;
- c->Nh+=(len>>29);
- c->Nl=l;
-
- if (c->num != 0)
- {
- p=c->data;
- sw=c->num>>2;
- sc=c->num&0x03;
-
- if ((c->num+len) >= SHA_CBLOCK)
- {
- l= p[sw];
- M_p_c2nl(data,l,sc);
- p[sw++]=l;
- for (; sw<SHA_LBLOCK; sw++)
- {
- M_c2nl(data,l);
- p[sw]=l;
- }
- len-=(SHA_CBLOCK-c->num);
-
- sha1_block(c,p,64);
- c->num=0;
- /* drop through and do the rest */
- }
- else
- {
- c->num+=(int)len;
- if ((sc+len) < 4) /* ugly, add char's to a word */
- {
- l= p[sw];
- M_p_c2nl_p(data,l,sc,len);
- p[sw]=l;
- }
- else
- {
- ew=(c->num>>2);
- ec=(c->num&0x03);
- l= p[sw];
- M_p_c2nl(data,l,sc);
- p[sw++]=l;
- for (; sw < ew; sw++)
- { M_c2nl(data,l); p[sw]=l; }
- if (ec)
- {
- M_c2nl_p(data,l,ec);
- p[sw]=l;
- }
- }
- return;
- }
+ uint64_t len;
+ size_t t;
+ unsigned char tmp[SHA_CBLOCK + sizeof(uint64_t)] = {0x80, 0};
+
+ len = (uint64_t)c->Nl | (uint64_t)c->Nh << 32;
+ t = 64 + 56 - c->Nl % 64;
+ if (t > 64)
+ t -= 64;
+
+ /* length in bits */
+ len <<= 3;
+ be64enc(tmp + t, len);
+ SHA1_Update(c, tmp, t + 8);
+ assert(c->num == 0);
+
+ be32enc(md + 0, c->h0);
+ be32enc(md + 4, c->h1);
+ be32enc(md + 8, c->h2);
+ be32enc(md + 12, c->h3);
+ be32enc(md + 16, c->h4);
+
+ explicit_bzero(c, sizeof(*c));
+}
+
+#ifndef SHA1_ASM
+static void
+/* invariant: len is a multiple of SHA_CBLOCK */
+sha1_block(SHA1_CTX *c, const void *data, size_t len)
+{
+ uint32_t w[16];
+ uint32_t h0 = c->h0, h1 = c->h1, h2 = c->h2, h3 = c->h3, h4 = c->h4;
+ const char *p = data;
+
+ while (len >= SHA_CBLOCK) {
+ size_t i;
+ uint32_t a = h0, b = h1, c = h2, d = h3, e = h4;
+ uint32_t f, t, tmp;
+
+# pragma unroll
+ for (i = 0; i < 16; i++)
+ w[i] = be32dec(p + 4*i);
+
+# pragma unroll
+ for (i = 0; i < 16; i++) {
+ f = b & c | ~b & d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K0;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
- /* We can only do the following code for assember, the reason
- * being that the sha1_block 'C' version changes the values
- * in the 'data' array. The assember code avoids this and
- * copies it to a local array. I should be able to do this for
- * the C version as well....
- */
-#if 1
-#if BYTE_ORDER == BIG_ENDIAN || defined(SHA1_ASM)
- if ((((unsigned int)data)%sizeof(u_int32_t)) == 0)
- {
- sw=len/SHA_CBLOCK;
- if (sw)
- {
- sw*=SHA_CBLOCK;
- sha1_block(c,(u_int32_t *)data,sw);
- data+=sw;
- len-=sw;
- }
+
+# pragma unroll
+ for (; i < 20; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b & c | ~b & d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K0;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
-#endif
-#endif
- /* we now can process the input data in blocks of SHA_CBLOCK
- * chars and save the leftovers to c->data. */
- p=c->data;
- while (len >= SHA_CBLOCK)
- {
-#if BYTE_ORDER == BIG_ENDIAN || BYTE_ORDER == LITTLE_ENDIAN
- if (p != (u_int32_t *)data)
- memcpy(p,data,SHA_CBLOCK);
- data+=SHA_CBLOCK;
-# if BYTE_ORDER == LITTLE_ENDIAN
-# ifndef SHA1_ASM /* Will not happen */
- for (sw=(SHA_LBLOCK/4); sw; sw--)
- {
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
- }
- p=c->data;
-# endif
-# endif
-#else
- for (sw=(SHA_BLOCK/4); sw; sw--)
- {
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- M_c2nl(data,l); *(p++)=l;
- }
- p=c->data;
-#endif
- sha1_block(c,p,64);
- len-=SHA_CBLOCK;
+
+# pragma unroll
+ for (; i < 40; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b ^ c ^ d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K1;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
- ec=(int)len;
- c->num=ec;
- ew=(ec>>2);
- ec&=0x03;
-
- for (sw=0; sw < ew; sw++)
- { M_c2nl(data,l); p[sw]=l; }
- M_c2nl_p(data,l,ec);
- p[sw]=l;
- }
-static void SHA1_Transform(SHA_CTX *c, unsigned char *b)
- {
- u_int32_t p[16];
-#if BYTE_ORDER != BIG_ENDIAN
- u_int32_t *q;
- int i;
-#endif
+# pragma unroll
+ for (; i < 60; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = (b | c) & d | b & c;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K2;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
+ }
-#if BYTE_ORDER == BIG_ENDIAN || BYTE_ORDER == LITTLE_ENDIAN
- memcpy(p,b,64);
-#if BYTE_ORDER == LITTLE_ENDIAN
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
- {
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
+# pragma unroll
+ for (; i < 80; i++) {
+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
+ w[i & 0xf] = tmp << 1 | tmp >> 32 - 1;
+
+ f = b ^ c ^ d;
+ t = (a << 5 | a >> 32 - 5) + f + e + w[i & 0xf] + K3;
+ e = d;
+ d = c;
+ c = b << 30 | b >> 32 - 30;
+ b = a;
+ a = t;
}
-#endif
-#else
- q=p;
- for (i=(SHA_LBLOCK/4); i; i--)
- {
- u_int32_t l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- c2nl(b,l); *(q++)=l;
- }
-#endif
- sha1_block(c,p,64);
- }
-#ifndef SHA1_ASM
+ h0 += a;
+ h1 += b;
+ h2 += c;
+ h3 += d;
+ h4 += e;
-void
-sha1_block(SHA_CTX *c, const u_int32_t *W, int num)
-{
- u_int32_t A,B,C,D,E,T;
- u_int32_t X[16];
-
- A=c->h0;
- B=c->h1;
- C=c->h2;
- D=c->h3;
- E=c->h4;
-
- for (;;)
- {
- BODY_00_15( 0,A,B,C,D,E,T,W);
- BODY_00_15( 1,T,A,B,C,D,E,W);
- BODY_00_15( 2,E,T,A,B,C,D,W);
- BODY_00_15( 3,D,E,T,A,B,C,W);
- BODY_00_15( 4,C,D,E,T,A,B,W);
- BODY_00_15( 5,B,C,D,E,T,A,W);
- BODY_00_15( 6,A,B,C,D,E,T,W);
- BODY_00_15( 7,T,A,B,C,D,E,W);
- BODY_00_15( 8,E,T,A,B,C,D,W);
- BODY_00_15( 9,D,E,T,A,B,C,W);
- BODY_00_15(10,C,D,E,T,A,B,W);
- BODY_00_15(11,B,C,D,E,T,A,W);
- BODY_00_15(12,A,B,C,D,E,T,W);
- BODY_00_15(13,T,A,B,C,D,E,W);
- BODY_00_15(14,E,T,A,B,C,D,W);
- BODY_00_15(15,D,E,T,A,B,C,W);
- BODY_16_19(16,C,D,E,T,A,B,W,W,W,W);
- BODY_16_19(17,B,C,D,E,T,A,W,W,W,W);
- BODY_16_19(18,A,B,C,D,E,T,W,W,W,W);
- BODY_16_19(19,T,A,B,C,D,E,W,W,W,X);
-
- BODY_20_31(20,E,T,A,B,C,D,W,W,W,X);
- BODY_20_31(21,D,E,T,A,B,C,W,W,W,X);
- BODY_20_31(22,C,D,E,T,A,B,W,W,W,X);
- BODY_20_31(23,B,C,D,E,T,A,W,W,W,X);
- BODY_20_31(24,A,B,C,D,E,T,W,W,X,X);
- BODY_20_31(25,T,A,B,C,D,E,W,W,X,X);
- BODY_20_31(26,E,T,A,B,C,D,W,W,X,X);
- BODY_20_31(27,D,E,T,A,B,C,W,W,X,X);
- BODY_20_31(28,C,D,E,T,A,B,W,W,X,X);
- BODY_20_31(29,B,C,D,E,T,A,W,W,X,X);
- BODY_20_31(30,A,B,C,D,E,T,W,X,X,X);
- BODY_20_31(31,T,A,B,C,D,E,W,X,X,X);
- BODY_32_39(32,E,T,A,B,C,D,X);
- BODY_32_39(33,D,E,T,A,B,C,X);
- BODY_32_39(34,C,D,E,T,A,B,X);
- BODY_32_39(35,B,C,D,E,T,A,X);
- BODY_32_39(36,A,B,C,D,E,T,X);
- BODY_32_39(37,T,A,B,C,D,E,X);
- BODY_32_39(38,E,T,A,B,C,D,X);
- BODY_32_39(39,D,E,T,A,B,C,X);
-
- BODY_40_59(40,C,D,E,T,A,B,X);
- BODY_40_59(41,B,C,D,E,T,A,X);
- BODY_40_59(42,A,B,C,D,E,T,X);
- BODY_40_59(43,T,A,B,C,D,E,X);
- BODY_40_59(44,E,T,A,B,C,D,X);
- BODY_40_59(45,D,E,T,A,B,C,X);
- BODY_40_59(46,C,D,E,T,A,B,X);
- BODY_40_59(47,B,C,D,E,T,A,X);
- BODY_40_59(48,A,B,C,D,E,T,X);
- BODY_40_59(49,T,A,B,C,D,E,X);
- BODY_40_59(50,E,T,A,B,C,D,X);
- BODY_40_59(51,D,E,T,A,B,C,X);
- BODY_40_59(52,C,D,E,T,A,B,X);
- BODY_40_59(53,B,C,D,E,T,A,X);
- BODY_40_59(54,A,B,C,D,E,T,X);
- BODY_40_59(55,T,A,B,C,D,E,X);
- BODY_40_59(56,E,T,A,B,C,D,X);
- BODY_40_59(57,D,E,T,A,B,C,X);
- BODY_40_59(58,C,D,E,T,A,B,X);
- BODY_40_59(59,B,C,D,E,T,A,X);
-
- BODY_60_79(60,A,B,C,D,E,T,X);
- BODY_60_79(61,T,A,B,C,D,E,X);
- BODY_60_79(62,E,T,A,B,C,D,X);
- BODY_60_79(63,D,E,T,A,B,C,X);
- BODY_60_79(64,C,D,E,T,A,B,X);
- BODY_60_79(65,B,C,D,E,T,A,X);
- BODY_60_79(66,A,B,C,D,E,T,X);
- BODY_60_79(67,T,A,B,C,D,E,X);
- BODY_60_79(68,E,T,A,B,C,D,X);
- BODY_60_79(69,D,E,T,A,B,C,X);
- BODY_60_79(70,C,D,E,T,A,B,X);
- BODY_60_79(71,B,C,D,E,T,A,X);
- BODY_60_79(72,A,B,C,D,E,T,X);
- BODY_60_79(73,T,A,B,C,D,E,X);
- BODY_60_79(74,E,T,A,B,C,D,X);
- BODY_60_79(75,D,E,T,A,B,C,X);
- BODY_60_79(76,C,D,E,T,A,B,X);
- BODY_60_79(77,B,C,D,E,T,A,X);
- BODY_60_79(78,A,B,C,D,E,T,X);
- BODY_60_79(79,T,A,B,C,D,E,X);
-
- c->h0=(c->h0+E)&0xffffffffL;
- c->h1=(c->h1+T)&0xffffffffL;
- c->h2=(c->h2+A)&0xffffffffL;
- c->h3=(c->h3+B)&0xffffffffL;
- c->h4=(c->h4+C)&0xffffffffL;
-
- num-=64;
- if (num <= 0) break;
-
- A=c->h0;
- B=c->h1;
- C=c->h2;
- D=c->h3;
- E=c->h4;
-
- W+=16;
- }
+ p += SHA_CBLOCK;
+ len -= SHA_CBLOCK;
}
-#endif
-void SHA1_Final(unsigned char *md, SHA_CTX *c)
- {
- int i,j;
- u_int32_t l;
- u_int32_t *p;
- static unsigned char end[4]={0x80,0x00,0x00,0x00};
- unsigned char *cp=end;
-
- /* c->num should definitly have room for at least one more byte. */
- p=c->data;
- j=c->num;
- i=j>>2;
-#ifdef PURIFY
- if ((j&0x03) == 0) p[i]=0;
+ c->h0 = h0;
+ c->h1 = h1;
+ c->h2 = h2;
+ c->h3 = h3;
+ c->h4 = h4;
+}
#endif
- l=p[i];
- M_p_c2nl(cp,l,j&0x03);
- p[i]=l;
- i++;
- /* i is the next 'undefined word' */
- if (c->num >= SHA_LAST_BLOCK)
- {
- for (; i<SHA_LBLOCK; i++)
- p[i]=0;
- sha1_block(c,p,64);
- i=0;
- }
- for (; i<(SHA_LBLOCK-2); i++)
- p[i]=0;
- p[SHA_LBLOCK-2]=c->Nh;
- p[SHA_LBLOCK-1]=c->Nl;
-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
- Endian_Reverse32(p[SHA_LBLOCK-2]);
- Endian_Reverse32(p[SHA_LBLOCK-1]);
-#endif
- sha1_block(c,p,64);
- cp=md;
- l=c->h0; nl2c(l,cp);
- l=c->h1; nl2c(l,cp);
- l=c->h2; nl2c(l,cp);
- l=c->h3; nl2c(l,cp);
- l=c->h4; nl2c(l,cp);
-
- /* Clear the context state */
- explicit_bzero(&c, sizeof(c));
- }
#ifdef WEAK_REFS
/* When building libmd, provide weak references. Note: this is not
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Feb 1, 2:55 AM (16 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16374401
Default Alt Text
D45444.diff (115 KB)
Attached To
Mode
D45444: lib/libmd: rework and accelerate SHA1 implementation
Attached
Detach File
Event Timeline
Log In to Comment