D45444.diff
No OneTemporary
Actions

Size

115 KB

Referenced Files

None

Subscribers

None

D45444.diff
View Options

	diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile
	--- a/lib/libmd/Makefile
	+++ b/lib/libmd/Makefile
	@@ -117,9 +117,12 @@
	.endif

	.if ${USE_ASM_SOURCES} != 0
	-.if exists(${MACHINE_ARCH}/sha.S)
	-SRCS+= sha.S
	+.if exists(${MACHINE_ARCH}/sha1block.S)
	+SRCS+= sha1block.S
	CFLAGS+= -DSHA1_ASM
	+.if exists(${MACHINE_ARCH}/sha1dispatch.c)
	+SRCS+= sha1dispatch.c
	+.endif
	.endif
	.if exists(${MACHINE_ARCH}/rmd160.S)
	SRCS+= rmd160.S
	@@ -135,7 +138,7 @@
	# the assembly vs C versions, and skein_block needs to be rebuilt if it changes.
	skein_block.o skein_block.pico: Makefile
	.endif
	-.if exists(${MACHINE_ARCH}/sha.S) \|\| exists(${MACHINE_ARCH}/rmd160.S) \|\| exists(${MACHINE_ARCH}/skein_block_asm.S)
	+.if exists(${MACHINE_ARCH}/rmd160.S) \|\| exists(${MACHINE_ARCH}/skein_block_asm.S)
	ACFLAGS+= -DELF -Wa,--noexecstack
	.endif
	.if ${MACHINE_CPUARCH} == "aarch64"
	diff --git a/lib/libmd/aarch64/sha1block.S b/lib/libmd/aarch64/sha1block.S
	new file mode 100644
	--- /dev/null
	+++ b/lib/libmd/aarch64/sha1block.S
	@@ -0,0 +1,490 @@
	+/*-
	+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
	+ *
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * sha1block_sha1 implementation based on sha1-arm.c,
	+ * written and placed in public domain by Jeffrey Walton
	+ * based on code from ARM, and by Johannes Schneiders, Skip
	+ * Hovsmith and Barry O'Rourke for the mbedTLS project.
	+ */
	+
	+#include <machine/asm.h>
	+
	+/*
	+ * Scalar SHA1 implementation.
	+ *
	+ * Due to the ample register file available on AArch64, the w array is
	+ * kept entirely in registers. The saved a-e variables are instead kept
	+ * in memory as we don't have that much memory.
	+ */
	+
	+ // sha1block(SHA1_CTX, buf, len)
	+ENTRY(_libmd_sha1block_scalar)
	+ctx .req x0
	+buf .req x1
	+len .req x2
	+w .req sp
	+a .req w3
	+b .req w4
	+c .req w5
	+d .req w6
	+e .req w7
	+k .req w8
	+f .req w9
	+tmp .req w10
	+w_0 .req w11
	+w_1 .req w12
	+w_2 .req w13
	+w_3 .req w14
	+w_4 .req w15
	+w_5 .req w16
	+w_6 .req w17
	+// w18 is the platform register
	+w_7 .req w19
	+w_8 .req w20
	+w_9 .req w21
	+w_10 .req w22
	+w_11 .req w23
	+w_12 .req w24
	+w_13 .req w25
	+w_14 .req w26
	+w_15 .req w27
	+
	+.macro shuffle w_i, w_i3, w_i8, w_i14
	+ eor \w_i, \w_i, \w_i3
	+ eor tmp, \w_i8, \w_i14
	+ eor \w_i, \w_i, tmp // w[i-16] ^ w[i-14] ^ w[i-8] ^ w[i-3]
	+ ror \w_i, \w_i, #31 // w[i] = ... ror #31
	+.endm
	+
	+.macro func1 a, b, c, d, e
	+ and f, \c, \b
	+ bic tmp, \d, \b
	+ orr f, f, tmp
	+.endm
	+
	+.macro func2 a, b, c, d, e
	+ eor f, \b, \c
	+ eor f, f, \d
	+.endm
	+
	+.macro func3 a, b, c, d, e
	+ eor tmp, \b, \c
	+ and f, \b, \c
	+ and tmp, tmp, \d
	+ orr f, f, tmp
	+.endm
	+
	+.macro func4 a, b, c, d, e
	+ func2 \a, \b, \c, \d, \e
	+.endm
	+
	+.macro mix a, b, c, d, e, w_i
	+ ror \b, \b, #2
	+ ror tmp, \a, #27
	+ add \e, \e, \w_i
	+ add tmp, tmp, k
	+ add \e, \e, f
	+ add \e, \e, tmp // (a ror 27) + e + f + k + w[i]
	+.endm
	+
	+.macro round1 a, b, c, d, e, w_i
	+ func1 \a, \b, \c, \d, \e
	+ rev \w_i, \w_i
	+ mix \a, \b, \c, \d, \e, \w_i
	+.endm
	+
	+.macro round func, a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	+ shuffle \w_i, \w_i3, \w_i8, \w_i14
	+ \func \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, \w_i
	+.endm
	+
	+.macro round1x a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	+ round func1, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
	+.endm
	+
	+.macro round2 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	+ round func2, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
	+.endm
	+
	+.macro round3 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	+ round func3, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
	+.endm
	+
	+.macro round4 a, b, c, d, e, w_i, w_i3, w_i8, w_i14
	+ round func4, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14
	+.endm
	+
	+ ands len, len, #~63 // take length in multiples of block length
	+ beq 1f // bail out if input empty
	+
	+ sub sp, sp, #24+9*8 // allocate stack space
	+ str x19, [sp, #24+0*8]
	+ stp x20, x21, [sp, #24+1*8]
	+ stp x22, x23, [sp, #24+3*8]
	+ stp x24, x25, [sp, #24+5*8]
	+ stp x26, x27, [sp, #24+7*8]
	+
	+ ldp a, b, [ctx, #0] // load SHA1 state from context
	+ ldp c, d, [ctx, #8]
	+ ldr e, [ctx, #16]
	+
	+0: stp a, b, [sp, #0] // save old SHA1 state
	+ stp c, d, [sp, #8]
	+ str e, [sp, #16]
	+
	+ movz k, #0x7999 // round constant 1
	+ movk k, #0x5a82, lsl #16
	+
	+ ldp w_0, w_1, [buf, #0*4]
	+ round1 a, b, c, d, e, w_0
	+ round1 e, a, b, c, d, w_1
	+
	+ ldp w_2, w_3, [buf, #2*4]
	+ round1 d, e, a, b, c, w_2
	+ round1 c, d, e, a, b, w_3
	+
	+ ldp w_4, w_5, [buf, #4*4]
	+ round1 b, c, d, e, a, w_4
	+ round1 a, b, c, d, e, w_5
	+
	+ ldp w_6, w_7, [buf, #6*4]
	+ round1 e, a, b, c, d, w_6
	+ round1 d, e, a, b, c, w_7
	+
	+ ldp w_8, w_9, [buf, #8*4]
	+ round1 c, d, e, a, b, w_8
	+ round1 b, c, d, e, a, w_9
	+
	+ ldp w_10, w_11, [buf, #10*4]
	+ round1 a, b, c, d, e, w_10
	+ round1 e, a, b, c, d, w_11
	+
	+ ldp w_12, w_13, [buf, #12*4]
	+ round1 d, e, a, b, c, w_12
	+ round1 c, d, e, a, b, w_13
	+
	+ ldp w_14, w_15, [buf, #14*4]
	+ round1 b, c, d, e, a, w_14
	+ round1 a, b, c, d, e, w_15
	+
	+ round1x e, a, b, c, d, w_0, w_13, w_8, w_2
	+ round1x d, e, a, b, c, w_1, w_14, w_9, w_3
	+ round1x c, d, e, a, b, w_2, w_15, w_10, w_4
	+ round1x b, c, d, e, a, w_3, w_0, w_11, w_5
	+
	+ movz k, #0xeba1 // round constant 2
	+ movk k, #0x6ed9, lsl #16
	+
	+ round2 a, b, c, d, e, w_4, w_1, w_12, w_6
	+ round2 e, a, b, c, d, w_5, w_2, w_13, w_7
	+ round2 d, e, a, b, c, w_6, w_3, w_14, w_8
	+ round2 c, d, e, a, b, w_7, w_4, w_15, w_9
	+ round2 b, c, d, e, a, w_8, w_5, w_0, w_10
	+
	+ round2 a, b, c, d, e, w_9, w_6, w_1, w_11
	+ round2 e, a, b, c, d, w_10, w_7, w_2, w_12
	+ round2 d, e, a, b, c, w_11, w_8, w_3, w_13
	+ round2 c, d, e, a, b, w_12, w_9, w_4, w_14
	+ round2 b, c, d, e, a, w_13, w_10, w_5, w_15
	+
	+ round2 a, b, c, d, e, w_14, w_11, w_6, w_0
	+ round2 e, a, b, c, d, w_15, w_12, w_7, w_1
	+ round2 d, e, a, b, c, w_0, w_13, w_8, w_2
	+ round2 c, d, e, a, b, w_1, w_14, w_9, w_3
	+ round2 b, c, d, e, a, w_2, w_15, w_10, w_4
	+
	+ round2 a, b, c, d, e, w_3, w_0, w_11, w_5
	+ round2 e, a, b, c, d, w_4, w_1, w_12, w_6
	+ round2 d, e, a, b, c, w_5, w_2, w_13, w_7
	+ round2 c, d, e, a, b, w_6, w_3, w_14, w_8
	+ round2 b, c, d, e, a, w_7, w_4, w_15, w_9
	+
	+ movz k, #0xbcdc // round constant 3
	+ movk k, #0x8f1b, lsl #16
	+
	+ round3 a, b, c, d, e, w_8, w_5, w_0, w_10
	+ round3 e, a, b, c, d, w_9, w_6, w_1, w_11
	+ round3 d, e, a, b, c, w_10, w_7, w_2, w_12
	+ round3 c, d, e, a, b, w_11, w_8, w_3, w_13
	+ round3 b, c, d, e, a, w_12, w_9, w_4, w_14
	+
	+ round3 a, b, c, d, e, w_13, w_10, w_5, w_15
	+ round3 e, a, b, c, d, w_14, w_11, w_6, w_0
	+ round3 d, e, a, b, c, w_15, w_12, w_7, w_1
	+ round3 c, d, e, a, b, w_0, w_13, w_8, w_2
	+ round3 b, c, d, e, a, w_1, w_14, w_9, w_3
	+
	+ round3 a, b, c, d, e, w_2, w_15, w_10, w_4
	+ round3 e, a, b, c, d, w_3, w_0, w_11, w_5
	+ round3 d, e, a, b, c, w_4, w_1, w_12, w_6
	+ round3 c, d, e, a, b, w_5, w_2, w_13, w_7
	+ round3 b, c, d, e, a, w_6, w_3, w_14, w_8
	+
	+ round3 a, b, c, d, e, w_7, w_4, w_15, w_9
	+ round3 e, a, b, c, d, w_8, w_5, w_0, w_10
	+ round3 d, e, a, b, c, w_9, w_6, w_1, w_11
	+ round3 c, d, e, a, b, w_10, w_7, w_2, w_12
	+ round3 b, c, d, e, a, w_11, w_8, w_3, w_13
	+
	+ movz k, #0xc1d6 // round constant 4
	+ movk k, #0xca62, lsl #16
	+
	+ round4 a, b, c, d, e, w_12, w_9, w_4, w_14
	+ round4 e, a, b, c, d, w_13, w_10, w_5, w_15
	+ round4 d, e, a, b, c, w_14, w_11, w_6, w_0
	+ round4 c, d, e, a, b, w_15, w_12, w_7, w_1
	+ round4 b, c, d, e, a, w_0, w_13, w_8, w_2
	+
	+ round4 a, b, c, d, e, w_1, w_14, w_9, w_3
	+ round4 e, a, b, c, d, w_2, w_15, w_10, w_4
	+ round4 d, e, a, b, c, w_3, w_0, w_11, w_5
	+ round4 c, d, e, a, b, w_4, w_1, w_12, w_6
	+ round4 b, c, d, e, a, w_5, w_2, w_13, w_7
	+
	+ round4 a, b, c, d, e, w_6, w_3, w_14, w_8
	+ round4 e, a, b, c, d, w_7, w_4, w_15, w_9
	+ round4 d, e, a, b, c, w_8, w_5, w_0, w_10
	+ round4 c, d, e, a, b, w_9, w_6, w_1, w_11
	+ round4 b, c, d, e, a, w_10, w_7, w_2, w_12
	+
	+ round4 a, b, c, d, e, w_11, w_8, w_3, w_13
	+ round4 e, a, b, c, d, w_12, w_9, w_4, w_14
	+ round4 d, e, a, b, c, w_13, w_10, w_5, w_15
	+ round4 c, d, e, a, b, w_14, w_11, w_6, w_0
	+ round4 b, c, d, e, a, w_15, w_12, w_7, w_1
	+
	+ ldp w_0, w_1, [sp, #0] // reload saved SHA1 state
	+ ldp w_2, w_3, [sp, #8]
	+ ldr w_4, [sp, #16]
	+
	+ add a, a, w_0
	+ add b, b, w_1
	+ add c, c, w_2
	+ add d, d, w_3
	+ add e, e, w_4
	+
	+ add buf, buf, #64
	+ subs len, len, #64
	+ bhi 0b
	+
	+ stp a, b, [ctx, #0] // write updated SHA1 state
	+ stp c, d, [ctx, #8]
	+ str e, [ctx, #16]
	+
	+ ldr x19, [sp, #24+0*8]
	+ ldp x20, x21, [sp, #24+1*8]
	+ ldp x22, x23, [sp, #24+3*8]
	+ ldp x24, x25, [sp, #24+5*8]
	+ ldp x26, x27, [sp, #24+7*8]
	+ add sp, sp, #24+9*8
	+
	+1: ret
	+END(_libmd_sha1block_scalar)
	+
	+/*
	+ * SHA1 implementation using the SHA1 instruction set extension.
	+ */
	+
	+ .arch_extension sha2
	+
	+ // sha1block(SHA1_CTX, buf, len)
	+ENTRY(_libmd_sha1block_sha1)
	+ /* ctx, buf, len: same as for sha1block_scalar */
	+kaddr .req x3
	+abcd .req v0
	+abcd_q .req q0 // alias for use with scalar instructions
	+abcd_s .req s0
	+e0 .req s1
	+e0_v .req v1
	+e1 .req s2
	+abcd_saved .req v3
	+e0_saved .req v4
	+tmp0 .req v5
	+tmp1 .req v6
	+msg0 .req v16
	+msg1 .req v17
	+msg2 .req v18
	+msg3 .req v19
	+k0 .req v20
	+k1 .req v21
	+k2 .req v22
	+k3 .req v23
	+
	+ ands len, len, #~63 // take length in multiples of block length
	+ beq 1f // bail out if input empty
	+
	+ ldr abcd_q, [ctx, #0]
	+ ldr e0, [ctx, #16]
	+
	+ adrp kaddr, k1234
	+ add kaddr, kaddr, #:lo12:k1234
	+ ld4r {k0.4s, k1.4s, k2.4s, k3.4s}, [kaddr]
	+
	+0: mov abcd_saved.16b, abcd.16b
	+ mov e0_saved.16b, e0_v.16b
	+
	+ ld1 {msg0.4s, msg1.4s, msg2.4s, msg3.4s}, [buf], #64
	+ rev32 msg0.16b, msg0.16b
	+ rev32 msg1.16b, msg1.16b
	+ rev32 msg2.16b, msg2.16b
	+ rev32 msg3.16b, msg3.16b
	+
	+ add tmp0.4s, msg0.4s, k0.4s
	+ add tmp1.4s, msg1.4s, k0.4s
	+
	+ /* rounds 0--3 */
	+ sha1h e1, abcd_s
	+ sha1c abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg2.4s, k0.4s
	+ sha1su0 msg0.4s, msg1.4s, msg2.4s
	+
	+ /* rounds 4--7 */
	+ sha1h e0, abcd_s
	+ sha1c abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg3.4s, k0.4s
	+ sha1su1 msg0.4s, msg3.4s
	+ sha1su0 msg1.4s, msg2.4s, msg3.4s
	+
	+ /* rounds 8--11 */
	+ sha1h e1, abcd_s
	+ sha1c abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg0.4s, k0.4s
	+ sha1su1 msg1.4s, msg0.4s
	+ sha1su0 msg2.4s, msg3.4s, msg0.4s
	+
	+ /* rounds 12--15 */
	+ sha1h e0, abcd_s
	+ sha1c abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg1.4s, k1.4s
	+ sha1su1 msg2.4s, msg1.4s
	+ sha1su0 msg3.4s, msg0.4s, msg1.4s
	+
	+ /* rounds 16--19 */
	+ sha1h e1, abcd_s
	+ sha1c abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg2.4s, k1.4s
	+ sha1su1 msg3.4s, msg2.4s
	+ sha1su0 msg0.4s, msg1.4s, msg2.4s
	+
	+ /* rounds 20--23 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg3.4s, k1.4s
	+ sha1su1 msg0.4s, msg3.4s
	+ sha1su0 msg1.4s, msg2.4s, msg3.4s
	+
	+ /* rounds 24--27 */
	+ sha1h e1, abcd_s
	+ sha1p abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg0.4s, k1.4s
	+ sha1su1 msg1.4s, msg0.4s
	+ sha1su0 msg2.4s, msg3.4s, msg0.4s
	+
	+ /* rounds 28--31 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg1.4s, k1.4s
	+ sha1su1 msg2.4s, msg1.4s
	+ sha1su0 msg3.4s, msg0.4s, msg1.4s
	+
	+ /* rounds 32--35 */
	+ sha1h e1, abcd_s
	+ sha1p abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg2.4s, k2.4s
	+ sha1su1 msg3.4s, msg2.4s
	+ sha1su0 msg0.4s, msg1.4s, msg2.4s
	+
	+ /* rounds 36--39 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg3.4s, k2.4s
	+ sha1su1 msg0.4s, msg3.4s
	+ sha1su0 msg1.4s, msg2.4s, msg3.4s
	+
	+ /* rounds 40--43 */
	+ sha1h e1, abcd_s
	+ sha1m abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg0.4s, k2.4s
	+ sha1su1 msg1.4s, msg0.4s
	+ sha1su0 msg2.4s, msg3.4s, msg0.4s
	+
	+ /* rounds 44--47 */
	+ sha1h e0, abcd_s
	+ sha1m abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg1.4s, k2.4s
	+ sha1su1 msg2.4s, msg1.4s
	+ sha1su0 msg3.4s, msg0.4s, msg1.4s
	+
	+ /* rounds 48--51 */
	+ sha1h e1, abcd_s
	+ sha1m abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg2.4s, k2.4s
	+ sha1su1 msg3.4s, msg2.4s
	+ sha1su0 msg0.4s, msg1.4s, msg2.4s
	+
	+ /* rounds 52--55 */
	+ sha1h e0, abcd_s
	+ sha1m abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg3.4s, k3.4s
	+ sha1su1 msg0.4s, msg3.4s
	+ sha1su0 msg1.4s, msg2.4s, msg3.4s
	+
	+ /* rounds 56--59 */
	+ sha1h e1, abcd_s
	+ sha1m abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg0.4s, k3.4s
	+ sha1su1 msg1.4s, msg0.4s
	+ sha1su0 msg2.4s, msg3.4s, msg0.4s
	+
	+ /* rounds 60--63 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg1.4s, k3.4s
	+ sha1su1 msg2.4s, msg1.4s
	+ sha1su0 msg3.4s, msg0.4s, msg1.4s
	+
	+ /* rounds 64--67 */
	+ sha1h e1, abcd_s
	+ sha1p abcd_q, e0, tmp0.4s
	+ add tmp0.4s, msg2.4s, k3.4s
	+ sha1su1 msg3.4s, msg2.4s
	+ sha1su0 msg0.4s, msg1.4s, msg2.4s
	+
	+ /* rounds 68--71 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+ add tmp1.4s, msg3.4s, k3.4s
	+ sha1su1 msg0.4s, msg3.4s
	+
	+ /* rounds 72--75 */
	+ sha1h e1, abcd_s
	+ sha1p abcd_q, e0, tmp0.4s
	+
	+ /* rounds 76--79 */
	+ sha1h e0, abcd_s
	+ sha1p abcd_q, e1, tmp1.4s
	+
	+ add e0_v.4s, e0_v.4s, e0_saved.4s
	+ add abcd.4s, abcd.4s, abcd_saved.4s
	+
	+ subs len, len, #64
	+ bhi 0b
	+
	+ str abcd_q, [ctx, #0]
	+ str e0, [ctx, #16]
	+
	+1: ret
	+END(_libmd_sha1block_sha1)
	+
	+ .section .rodata
	+ .balign 16
	+k1234: .4byte 0x5a827999
	+ .4byte 0x6ed9eba1
	+ .4byte 0x8f1bbcdc
	+ .4byte 0xca62c1d6
	+ .size k1234, .-k1234
	+
	+ .section .note.GNU-stack,"",%progbits
	diff --git a/lib/libmd/aarch64/sha1dispatch.c b/lib/libmd/aarch64/sha1dispatch.c
	new file mode 100644
	--- /dev/null
	+++ b/lib/libmd/aarch64/sha1dispatch.c
	@@ -0,0 +1,24 @@
	+/*-
	+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
	+ *
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ */
	+
	+#include <machine/ifunc.h>
	+#include <sha.h>
	+#include <sys/auxv.h>
	+
	+extern void _libmd_sha1block_scalar(SHA1_CTX , const void , size_t);
	+extern void _libmd_sha1block_sha1(SHA1_CTX , const void , size_t);
	+
	+DEFINE_IFUNC(, void, sha1_block, (SHA1_CTX , const void , size_t))
	+{
	+ unsigned long hwcap = 0;
	+
	+ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
	+
	+ if (hwcap & HWCAP_SHA1)
	+ return (_libmd_sha1block_sha1);
	+ else
	+ return (_libmd_sha1block_scalar);
	+}
	diff --git a/lib/libmd/amd64/sha1block.S b/lib/libmd/amd64/sha1block.S
	new file mode 100644
	--- /dev/null
	+++ b/lib/libmd/amd64/sha1block.S
	@@ -0,0 +1,1851 @@
	+/*-
	+ * Copyright (c) 2013 The Go Authors. All rights reserved.
	+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
	+ *
	+ * Adapted from Go's crypto/sha1/sha1block_amd64.s.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions are
	+ * met:
	+ *
	+ * * Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * * Redistributions in binary form must reproduce the above
	+ * copyright notice, this list of conditions and the following disclaimer
	+ * in the documentation and/or other materials provided with the
	+ * distribution.
	+ * * Neither the name of Google Inc. nor the names of its
	+ * contributors may be used to endorse or promote products derived from
	+ * this software without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ */
	+
	+#include <machine/asm.h>
	+
	+/*
	+ * SHA-1 block routine. See sha1c.c for C equivalent.
	+ *
	+ * There are 80 rounds of 4 types:
	+ * - rounds 0-15 are type 1 and load data (round1 macro).
	+ * - rounds 16-19 are type 1 and do not load data (round1x macro).
	+ * - rounds 20-39 are type 2 and do not load data (round2 macro).
	+ * - rounds 40-59 are type 3 and do not load data (round3 macro).
	+ * - rounds 60-79 are type 4 and do not load data (round4 macro).
	+ *
	+ * Each round loads or shuffles the data, then computes a per-round
	+ * function of b, c, d, and then mixes the result into and rotates the
	+ * five registers a, b, c, d, e holding the intermediate results.
	+ *
	+ * The register rotation is implemented by rotating the arguments to
	+ * the round macros instead of by explicit move instructions.
	+ */
	+.macro load index
	+ mov (\index)*4(%rsi), %r10d
	+ bswap %r10d
	+ mov %r10d, (\index)*4(%rsp)
	+.endm
	+
	+.macro shuffle index
	+ mov ((\index )&0xf)*4(%rsp), %r10d
	+ xor ((\index- 3)&0xf)*4(%rsp), %r10d
	+ xor ((\index- 8)&0xf)*4(%rsp), %r10d
	+ xor ((\index-14)&0xf)*4(%rsp), %r10d
	+ rol $1, %r10d
	+ mov %r10d, ((\index)&0xf)*4(%rsp)
	+.endm
	+
	+.macro func1 a, b, c, d, e
	+ mov \d, %r9d
	+ xor \c, %r9d
	+ and \b, %r9d
	+ xor \d, %r9d
	+.endm
	+
	+.macro func2 a, b, c, d, e
	+ mov \b, %r9d
	+ xor \c, %r9d
	+ xor \d, %r9d
	+.endm
	+
	+.macro func3 a, b, c, d, e
	+ mov \b, %r8d
	+ or \c, %r8d
	+ and \d, %r8d
	+ mov \b, %r9d
	+ and \c, %r9d
	+ or %r8d, %r9d
	+.endm
	+
	+.macro func4 a, b, c, d, e
	+ func2 \a, \b, \c, \d, \e
	+.endm
	+
	+.macro mix a, b, c, d, e, const
	+ rol $30, \b
	+ add %r9d, \e
	+ mov \a, %r8d
	+ rol $5, %r8d
	+ lea \const(\e, %r10d, 1), \e
	+ add %r8d, \e
	+.endm
	+
	+.macro round1 a, b, c, d, e, index
	+ load \index
	+ func1 \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, 0x5a827999
	+.endm
	+
	+.macro round1x a, b, c, d, e, index
	+ shuffle \index
	+ func1 \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, 0x5a827999
	+.endm
	+
	+.macro round2 a, b, c, d, e, index
	+ shuffle \index
	+ func2 \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, 0x6ed9eba1
	+.endm
	+
	+.macro round3 a, b, c, d, e, index
	+ shuffle \index
	+ func3 \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, 0x8f1bbcdc
	+.endm
	+
	+.macro round4 a, b, c, d, e, index
	+ shuffle \index
	+ func4 \a, \b, \c, \d, \e
	+ mix \a, \b, \c, \d, \e, 0xca62c1d6
	+.endm
	+
	+ // sha1block(SHA1_CTX, buf, len)
	+ENTRY(_libmd_sha1block_scalar)
	+ push %rbp
	+ push %rbx
	+ push %r12
	+ push %r13
	+ push %r14
	+ push %r15
	+ push %rdi // rdi: SHA1_CTX
	+ sub $64+8, %rsp // 64 bytes for round keys
	+ // plus alignment
	+
	+ mov %rdi, %rbp
	+ // rsi: buf
	+ and $~63, %rdx // rdx: length in blocks
	+ lea (%rsi, %rdx, 1), %rdi // rdi: end pointer
	+ mov (%rbp), %eax // c->h0
	+ mov 4(%rbp), %ebx // c->h1
	+ mov 8(%rbp), %ecx // c->h2
	+ mov 12(%rbp), %edx // c->h3
	+ mov 16(%rbp), %ebp // c->h4
	+
	+ cmp %rsi, %rdi // any data to process?
	+ je .Lend
	+
	+.Lloop: mov %eax, %r11d
	+ mov %ebx, %r12d
	+ mov %ecx, %r13d
	+ mov %edx, %r14d
	+ mov %ebp, %r15d
	+
	+ round1 %eax, %ebx, %ecx, %edx, %ebp, 0
	+ round1 %ebp, %eax, %ebx, %ecx, %edx, 1
	+ round1 %edx, %ebp, %eax, %ebx, %ecx, 2
	+ round1 %ecx, %edx, %ebp, %eax, %ebx, 3
	+ round1 %ebx, %ecx, %edx, %ebp, %eax, 4
	+
	+ round1 %eax, %ebx, %ecx, %edx, %ebp, 5
	+ round1 %ebp, %eax, %ebx, %ecx, %edx, 6
	+ round1 %edx, %ebp, %eax, %ebx, %ecx, 7
	+ round1 %ecx, %edx, %ebp, %eax, %ebx, 8
	+ round1 %ebx, %ecx, %edx, %ebp, %eax, 9
	+
	+ round1 %eax, %ebx, %ecx, %edx, %ebp, 10
	+ round1 %ebp, %eax, %ebx, %ecx, %edx, 11
	+ round1 %edx, %ebp, %eax, %ebx, %ecx, 12
	+ round1 %ecx, %edx, %ebp, %eax, %ebx, 13
	+ round1 %ebx, %ecx, %edx, %ebp, %eax, 14
	+
	+ round1 %eax, %ebx, %ecx, %edx, %ebp, 15
	+ round1x %ebp, %eax, %ebx, %ecx, %edx, 16
	+ round1x %edx, %ebp, %eax, %ebx, %ecx, 17
	+ round1x %ecx, %edx, %ebp, %eax, %ebx, 18
	+ round1x %ebx, %ecx, %edx, %ebp, %eax, 19
	+
	+ round2 %eax, %ebx, %ecx, %edx, %ebp, 20
	+ round2 %ebp, %eax, %ebx, %ecx, %edx, 21
	+ round2 %edx, %ebp, %eax, %ebx, %ecx, 22
	+ round2 %ecx, %edx, %ebp, %eax, %ebx, 23
	+ round2 %ebx, %ecx, %edx, %ebp, %eax, 24
	+
	+ round2 %eax, %ebx, %ecx, %edx, %ebp, 25
	+ round2 %ebp, %eax, %ebx, %ecx, %edx, 26
	+ round2 %edx, %ebp, %eax, %ebx, %ecx, 27
	+ round2 %ecx, %edx, %ebp, %eax, %ebx, 28
	+ round2 %ebx, %ecx, %edx, %ebp, %eax, 29
	+
	+ round2 %eax, %ebx, %ecx, %edx, %ebp, 30
	+ round2 %ebp, %eax, %ebx, %ecx, %edx, 31
	+ round2 %edx, %ebp, %eax, %ebx, %ecx, 32
	+ round2 %ecx, %edx, %ebp, %eax, %ebx, 33
	+ round2 %ebx, %ecx, %edx, %ebp, %eax, 34
	+
	+ round2 %eax, %ebx, %ecx, %edx, %ebp, 35
	+ round2 %ebp, %eax, %ebx, %ecx, %edx, 36
	+ round2 %edx, %ebp, %eax, %ebx, %ecx, 37
	+ round2 %ecx, %edx, %ebp, %eax, %ebx, 38
	+ round2 %ebx, %ecx, %edx, %ebp, %eax, 39
	+
	+ round3 %eax, %ebx, %ecx, %edx, %ebp, 40
	+ round3 %ebp, %eax, %ebx, %ecx, %edx, 41
	+ round3 %edx, %ebp, %eax, %ebx, %ecx, 42
	+ round3 %ecx, %edx, %ebp, %eax, %ebx, 43
	+ round3 %ebx, %ecx, %edx, %ebp, %eax, 44
	+
	+ round3 %eax, %ebx, %ecx, %edx, %ebp, 45
	+ round3 %ebp, %eax, %ebx, %ecx, %edx, 46
	+ round3 %edx, %ebp, %eax, %ebx, %ecx, 47
	+ round3 %ecx, %edx, %ebp, %eax, %ebx, 48
	+ round3 %ebx, %ecx, %edx, %ebp, %eax, 49
	+
	+ round3 %eax, %ebx, %ecx, %edx, %ebp, 50
	+ round3 %ebp, %eax, %ebx, %ecx, %edx, 51
	+ round3 %edx, %ebp, %eax, %ebx, %ecx, 52
	+ round3 %ecx, %edx, %ebp, %eax, %ebx, 53
	+ round3 %ebx, %ecx, %edx, %ebp, %eax, 54
	+
	+ round3 %eax, %ebx, %ecx, %edx, %ebp, 55
	+ round3 %ebp, %eax, %ebx, %ecx, %edx, 56
	+ round3 %edx, %ebp, %eax, %ebx, %ecx, 57
	+ round3 %ecx, %edx, %ebp, %eax, %ebx, 58
	+ round3 %ebx, %ecx, %edx, %ebp, %eax, 59
	+
	+ round4 %eax, %ebx, %ecx, %edx, %ebp, 60
	+ round4 %ebp, %eax, %ebx, %ecx, %edx, 61
	+ round4 %edx, %ebp, %eax, %ebx, %ecx, 62
	+ round4 %ecx, %edx, %ebp, %eax, %ebx, 63
	+ round4 %ebx, %ecx, %edx, %ebp, %eax, 64
	+
	+ round4 %eax, %ebx, %ecx, %edx, %ebp, 65
	+ round4 %ebp, %eax, %ebx, %ecx, %edx, 66
	+ round4 %edx, %ebp, %eax, %ebx, %ecx, 67
	+ round4 %ecx, %edx, %ebp, %eax, %ebx, 68
	+ round4 %ebx, %ecx, %edx, %ebp, %eax, 69
	+
	+ round4 %eax, %ebx, %ecx, %edx, %ebp, 70
	+ round4 %ebp, %eax, %ebx, %ecx, %edx, 71
	+ round4 %edx, %ebp, %eax, %ebx, %ecx, 72
	+ round4 %ecx, %edx, %ebp, %eax, %ebx, 73
	+ round4 %ebx, %ecx, %edx, %ebp, %eax, 74
	+
	+ round4 %eax, %ebx, %ecx, %edx, %ebp, 75
	+ round4 %ebp, %eax, %ebx, %ecx, %edx, 76
	+ round4 %edx, %ebp, %eax, %ebx, %ecx, 77
	+ round4 %ecx, %edx, %ebp, %eax, %ebx, 78
	+ round4 %ebx, %ecx, %edx, %ebp, %eax, 79
	+
	+ add %r11d, %eax
	+ add %r12d, %ebx
	+ add %r13d, %ecx
	+ add %r14d, %edx
	+ add %r15d, %ebp
	+
	+ add $64, %rsi
	+ cmp %rdi, %rsi
	+ jb .Lloop
	+
	+.Lend: add $64+8, %rsp
	+ pop %rdi // SHA1_CTX
	+ mov %eax, (%rdi)
	+ mov %ebx, 4(%rdi)
	+ mov %ecx, 8(%rdi)
	+ mov %edx, 12(%rdi)
	+ mov %ebp, 16(%rdi)
	+
	+ pop %r15
	+ pop %r14
	+ pop %r13
	+ pop %r12
	+ pop %rbx
	+ pop %rbp
	+ ret
	+END(_libmd_sha1block_scalar)
	+
	+/*
	+ * This is the implementation using AVX2, BMI1 and BMI2. It is based on:
	+ * "SHA-1 implementation with Intel(R) AVX2 instruction set extensions"
	+ * From http://software.intel.com/en-us/articles
	+ * (look for improving-the-performance-of-the-secure-hash-algorithm-1)
	+ * This implementation is 2x unrolled, and interleaves vector instructions,
	+ * used to precompute W, with scalar computation of current round
	+ * for optimal scheduling.
	+ */
	+
	+ /* trivial helper macros */
	+.macro update_hash a, tb, c, d, e
	+ add (%r9), \a
	+ mov \a, (%r9)
	+ add 4(%r9), \tb
	+ mov \tb, 4(%r9)
	+ add 8(%r9), \c
	+ mov \c, 8(%r9)
	+ add 12(%r9), \d
	+ mov \d, 12(%r9)
	+ add 16(%r9), \e
	+ mov \e, 16(%r9)
	+.endm
	+
	+ /* help macros for recalc, which does precomputations */
	+.macro precalc0 offset
	+ vmovdqu \offset(%r10), %xmm0
	+.endm
	+
	+.macro precalc1 offset
	+ vinserti128 $1, \offset(%r13), %ymm0, %ymm0
	+.endm
	+
	+.macro precalc2 yreg
	+ vpshufb %ymm10, %ymm0, \yreg
	+.endm
	+
	+.macro precalc4 yreg, k_offset
	+ vpaddd \k_offset(%r8), \yreg, %ymm0
	+.endm
	+
	+.macro precalc7 offset
	+ vmovdqu %ymm0, (\offset)*2(%r14)
	+.endm
	+
	+/*
	+ * Message scheduling pre-compute for rounds 0-15
	+ * r13 is a pointer to the even 64-byte block
	+ * r10 is a pointer to the odd 64-byte block
	+ * r14 is a pointer to the temp buffer
	+ * xmm0 is used as a temp register
	+ * yreg is clobbered as part of the computation
	+ * offset chooses a 16 byte chunk within a block
	+ * r8 is a pointer to the constants block
	+ * k_offset chooses K constants relevant to this round
	+ * xmm10 holds the swap mask
	+ */
	+.macro precalc00_15 offset, yreg
	+ precalc0 \offset
	+ precalc1 \offset
	+ precalc2 \yreg
	+ precalc4 \yreg, 0
	+ precalc7 \offset
	+.endm
	+
	+ /* helper macros for precalc16_31 */
	+.macro precalc16 reg_sub16, reg_sub12, reg_sub4, reg
	+ vpalignr $8, \reg_sub16, \reg_sub12, \reg // w[i - 14]
	+ vpsrldq $4, \reg_sub4, %ymm0 // w[i - 3]
	+.endm
	+
	+.macro precalc17 reg_sub16, reg_sub8, reg
	+ vpxor \reg_sub8, \reg, \reg
	+ vpxor \reg_sub16, %ymm0, %ymm0
	+.endm
	+
	+.macro precalc18 reg
	+ vpxor %ymm0, \reg, \reg
	+ vpslldq $12, \reg, %ymm9
	+.endm
	+
	+.macro precalc19 reg
	+ vpslld $1, \reg, %ymm0
	+ vpsrld $31, \reg, \reg
	+ .endm
	+
	+.macro precalc20 reg
	+ vpor \reg, %ymm0, %ymm0
	+ vpslld $2, %ymm9, \reg
	+.endm
	+
	+.macro precalc21 reg
	+ vpsrld $30, %ymm9, %ymm9
	+ vpxor \reg, %ymm0, %ymm0
	+.endm
	+
	+.macro precalc23 reg, k_offset, offset
	+ vpxor %ymm9, %ymm0, \reg
	+ vpaddd \k_offset(%r8), \reg, %ymm0
	+ vmovdqu %ymm0, (\offset)(%r14)
	+.endm
	+
	+/*
	+ * Message scheduling pre-compute for rounds 16-31
	+ * calculating last 32 w[i] values in 8 XMM registers
	+ * pre-calculate K+w[i] values and store to mem
	+ * for later load by ALU add instruction.
	+ * "brute force" vectorization for rounds 16-31 only
	+ * due to w[i]->w[i-3] dependency.
	+ + clobbers 5 input ymm registers REG_SUB*
	+ * uses xmm0 and xmm9 as temp registers
	+ * As always, r8 is a pointer to constants block
	+ * and r14 is a pointer to temp buffer
	+ */
	+.macro precalc16_31 reg, reg_sub4, reg_sub8, reg_sub12, reg_sub16, k_offset, offset
	+ precalc16 \reg_sub16, \reg_sub12, \reg_sub4, \reg
	+ precalc17 \reg_sub16, \reg_sub8, \reg
	+ precalc18 \reg
	+ precalc19 \reg
	+ precalc20 \reg
	+ precalc21 \reg
	+ precalc23 \reg, \k_offset, \offset
	+.endm
	+
	+ /* helper macros for precalc_32_79 */
	+.macro precalc32 reg_sub8, reg_sub4
	+ vpalignr $8, \reg_sub8, \reg_sub4, %ymm0
	+.endm
	+
	+.macro precalc33 reg_sub28, reg
	+ vpxor \reg_sub28, \reg, \reg
	+.endm
	+
	+.macro precalc34 reg_sub16
	+ vpxor \reg_sub16, %ymm0, %ymm0
	+.endm
	+
	+.macro precalc35 reg
	+ vpxor %ymm0, \reg, \reg
	+.endm
	+
	+.macro precalc36 reg
	+ vpslld $2, \reg, %ymm0
	+.endm
	+
	+.macro precalc37 reg
	+ vpsrld $30, \reg, \reg
	+ vpor \reg, %ymm0, \reg
	+.endm
	+
	+.macro precalc39 reg, k_offset, offset
	+ vpaddd \k_offset(%r8), \reg, %ymm0
	+ vmovdqu %ymm0, \offset(%r14)
	+.endm
	+
	+.macro precalc32_79 reg, reg_sub4, reg_sub8, reg_sub16, reg_sub28, k_offset, offset
	+ precalc32 \reg_sub8, \reg_sub4
	+ precalc33 \reg_sub28, \reg
	+ precalc34 \reg_sub16
	+ precalc35 \reg
	+ precalc36 \reg
	+ precalc37 \reg
	+ precalc39 \reg, \k_offset, \offset
	+.endm
	+
	+.macro precalc
	+ precalc00_15 0x00, %ymm15
	+ precalc00_15 0x10, %ymm14
	+ precalc00_15 0x20, %ymm13
	+ precalc00_15 0x30, %ymm12
	+ precalc16_31 %ymm8, %ymm12, %ymm13, %ymm14, %ymm15, 0x00, 0x080
	+ precalc16_31 %ymm7, %ymm8, %ymm12, %ymm13, %ymm14, 0x20, 0x0a0
	+ precalc16_31 %ymm5, %ymm7, %ymm8, %ymm12, %ymm13, 0x20, 0x0c0
	+ precalc16_31 %ymm3, %ymm5, %ymm7, %ymm8, %ymm12, 0x20, 0x0e0
	+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x20, 0x100
	+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x20, 0x120
	+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x40, 0x140
	+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x40, 0x160
	+ precalc32_79 %ymm8, %ymm12, %ymm13, %ymm15, %ymm7, 0x40, 0x180
	+ precalc32_79 %ymm7, %ymm8, %ymm12, %ymm14, %ymm5, 0x40, 0x1a0
	+ precalc32_79 %ymm5, %ymm7, %ymm8, %ymm13, %ymm3, 0x40, 0x1c0
	+ precalc32_79 %ymm3, %ymm5, %ymm7, %ymm12, %ymm15, 0x60, 0x1e0
	+ precalc32_79 %ymm15, %ymm3, %ymm5, %ymm8, %ymm14, 0x60, 0x200
	+ precalc32_79 %ymm14, %ymm15, %ymm3, %ymm7, %ymm13, 0x60, 0x220
	+ precalc32_79 %ymm13, %ymm14, %ymm15, %ymm5, %ymm12, 0x60, 0x240
	+ precalc32_79 %ymm12, %ymm13, %ymm14, %ymm3, %ymm8, 0x60, 0x260
	+.endm
	+
	+/*
	+ * Macros calculating individual rounds have general form
	+ * calc_round_pre + precalc_round + calc_round_post
	+ * calc_round_{pre,post} macros follow
	+ */
	+.macro calc_f1_pre offset, reg_a, reg_b, reg_c, reg_e
	+ add \offset(%r15), \reg_e
	+ andn \reg_c, \reg_a, %ebp
	+ add \reg_b, \reg_e // add F from the previous round
	+ rorx $0x1b, \reg_a, %r12d
	+ rorx $2, \reg_a, \reg_b // for the next round
	+.endm
	+
	+/*
	+ * Calculate F for the next round
	+ */
	+.macro calc_f1_post reg_a, reg_b, reg_e
	+ and \reg_b, \reg_a // b & c
	+ xor %ebp, \reg_a // F1 = (b&c) ^ (~b&d)
	+ add %r12d, \reg_e
	+.endm
	+
	+/*
	+ * Registers are cyclically rotated:
	+ * edx -> eax -> edi -> esi -> ebx -> ecx
	+ */
	+.macro calc0
	+ mov %esi, %ebx // precalculate first round
	+ rorx $2, %esi, %esi
	+ andn %eax, %ebx, %ebp
	+ and %edi, %ebx
	+ xor %ebp, %ebx
	+ calc_f1_pre 0x0, %ecx, %ebx, %edi, %edx
	+ precalc0 0x80
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc1
	+ calc_f1_pre 0x4, %edx, %ecx, %esi, %eax
	+ precalc1 0x80
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc2
	+ calc_f1_pre 0x8, %eax, %edx, %ebx, %edi
	+ precalc2 %ymm15
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc3
	+ calc_f1_pre 0xc, %edi, %eax, %ecx, %esi
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc4
	+ calc_f1_pre 0x20, %esi, %edi, %edx, %ebx
	+ precalc4 %ymm15, 0x0
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc5
	+ calc_f1_pre 0x24, %ebx, %esi, %eax, %ecx
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc6
	+ calc_f1_pre 0x28, %ecx, %ebx, %edi, %edx
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc7
	+ calc_f1_pre 0x2c, %edx, %ecx, %esi, %eax
	+ precalc7 0x0
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc8
	+ calc_f1_pre 0x40, %eax, %edx, %ebx, %edi
	+ precalc0 0x90
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc9
	+ calc_f1_pre 0x44, %edi, %eax, %ecx, %esi
	+ precalc1 0x90
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc10
	+ calc_f1_pre 0x48, %esi, %edi, %edx, %ebx
	+ precalc2 %ymm14
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc11
	+ calc_f1_pre 0x4c, %ebx, %esi, %eax, %ecx
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc12
	+ calc_f1_pre 0x60, %ecx, %ebx, %edi, %edx
	+ precalc4 %ymm14, 0
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc13
	+ calc_f1_pre 0x64, %edx, %ecx, %esi, %eax
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc14
	+ calc_f1_pre 0x68, %eax, %edx, %ebx, %edi
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc15
	+ calc_f1_pre 0x6c, %edi, %eax, %ecx, %esi
	+ precalc7 0x10
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc16
	+ calc_f1_pre 0x80, %esi, %edi, %edx, %ebx
	+ precalc0 0xa0
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc17
	+ calc_f1_pre 0x84, %ebx, %esi, %eax, %ecx
	+ precalc1 0xa0
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc18
	+ calc_f1_pre 0x88, %ecx, %ebx, %edi, %edx
	+ precalc2 %ymm13
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc_f2_pre offset, reg_a, reg_b, reg_e
	+ add \offset(%r15), \reg_e
	+ add \reg_b, \reg_e // add F from the previous round
	+ rorx $0x1b, \reg_a, %r12d
	+ rorx $2, \reg_a, \reg_b // for next round
	+.endm
	+
	+.macro calc_f2_post reg_a, reg_b, reg_c, reg_e
	+ xor \reg_b, \reg_a
	+ add %r12d, \reg_e
	+ xor \reg_c, \reg_a
	+.endm
	+
	+.macro calc19
	+ calc_f2_pre 0x8c, %edx, %ecx, %eax
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc20
	+ calc_f2_pre 0xa0, %eax, %edx, %edi
	+ precalc4 %ymm13, 0x0
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc21
	+ calc_f2_pre 0xa4, %edi, %eax, %esi
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc22
	+ calc_f2_pre 0xa8, %esi, %edi, %ebx
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc23
	+ calc_f2_pre 0xac, %ebx, %esi, %ecx
	+ precalc7 0x20
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc24
	+ calc_f2_pre 0xc0, %ecx, %ebx, %edx
	+ precalc0 0xb0
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc25
	+ calc_f2_pre 0xc4, %edx, %ecx, %eax
	+ precalc1 0xb0
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc26
	+ calc_f2_pre 0xc8, %eax, %edx, %edi
	+ precalc2 %ymm12
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc27
	+ calc_f2_pre 0xcc, %edi, %eax, %esi
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc28
	+ calc_f2_pre 0xe0, %esi, %edi, %ebx
	+ precalc4 %ymm12, 0x0
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc29
	+ calc_f2_pre 0xe4, %ebx, %esi, %ecx
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc30
	+ calc_f2_pre 0xe8, %ecx, %ebx, %edx
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc31
	+ calc_f2_pre 0xec, %edx, %ecx, %eax
	+ precalc7 0x30
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc32
	+ calc_f2_pre 0x100, %eax, %edx, %edi
	+ precalc16 %ymm15, %ymm14, %ymm12, %ymm8
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc33
	+ calc_f2_pre 0x104, %edi, %eax, %esi
	+ precalc17 %ymm15, %ymm13, %ymm8
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc34
	+ calc_f2_pre 0x108, %esi, %edi, %ebx
	+ precalc18 %ymm8
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc35
	+ calc_f2_pre 0x10c, %ebx, %esi, %ecx
	+ precalc19 %ymm8
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc36
	+ calc_f2_pre 0x120, %ecx, %ebx, %edx
	+ precalc20 %ymm8
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc37
	+ calc_f2_pre 0x124, %edx, %ecx, %eax
	+ precalc21 %ymm8
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc38
	+ calc_f2_pre 0x128, %eax, %edx, %edi
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc_f3_pre offset, reg_e
	+ add \offset(%r15), \reg_e
	+.endm
	+
	+.macro calc_f3_post reg_a, reg_b, reg_c, reg_e, reg_tb
	+ add \reg_tb, \reg_e // add F from the previous round
	+ mov \reg_b, %ebp
	+ or \reg_a, %ebp
	+ rorx $0x1b, \reg_a, %r12d
	+ rorx $2, \reg_a, \reg_tb
	+ and \reg_c, %ebp // calculate F for the next round
	+ and \reg_b, \reg_a
	+ or %ebp, \reg_a
	+ add %r12d, \reg_e
	+.endm
	+
	+.macro calc39
	+ calc_f3_pre 0x12c, %esi
	+ precalc23 %ymm8, 0x0, 0x80
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc40
	+ calc_f3_pre 0x140, %ebx
	+ precalc16 %ymm14, %ymm13, %ymm8, %ymm7
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc41
	+ calc_f3_pre 0x144, %ecx
	+ precalc17 %ymm14, %ymm12, %ymm7
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc42
	+ calc_f3_pre 0x148, %edx
	+ precalc18 %ymm7
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc43
	+ calc_f3_pre 0x14c, %eax
	+ precalc19 %ymm7
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc44
	+ calc_f3_pre 0x160, %edi
	+ precalc20 %ymm7
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc45
	+ calc_f3_pre 0x164, %esi
	+ precalc21 %ymm7
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc46
	+ calc_f3_pre 0x168, %ebx
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc47
	+ calc_f3_pre 0x16c, %ecx
	+ vpxor %ymm9, %ymm0, %ymm7
	+ vpaddd 0x20(%r8), %ymm7, %ymm0
	+ vmovdqu %ymm0, 0xa0(%r14)
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc48
	+ calc_f3_pre 0x180, %edx
	+ precalc16 %ymm13, %ymm12, %ymm7, %ymm5
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc49
	+ calc_f3_pre 0x184, %eax
	+ precalc17 %ymm13, %ymm8, %ymm5
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc50
	+ calc_f3_pre 0x188, %edi
	+ precalc18 %ymm5
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc51
	+ calc_f3_pre 0x18c, %esi
	+ precalc19 %ymm5
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc52
	+ calc_f3_pre 0x1a0, %ebx
	+ precalc20 %ymm5
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc53
	+ calc_f3_pre 0x1a4, %ecx
	+ precalc21 %ymm5
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc54
	+ calc_f3_pre 0x1a8, %edx
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc55
	+ calc_f3_pre 0x1ac, %eax
	+ precalc23 %ymm5, 0x20, 0xc0
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc56
	+ calc_f3_pre 0x1c0, %edi
	+ precalc16 %ymm12, %ymm8, %ymm5, %ymm3
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc57
	+ calc_f3_pre 0x1c4, %esi
	+ precalc17 %ymm12, %ymm7, %ymm3
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc58
	+ calc_f3_pre 0x1c8, %ebx
	+ precalc18 %ymm3
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc59
	+ calc_f2_pre 0x1cc, %ebx, %esi, %ecx
	+ precalc19 %ymm3
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc60
	+ calc_f2_pre 0x1e0, %ecx, %ebx, %edx
	+ precalc20 %ymm3
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc61
	+ calc_f2_pre 0x1e4, %edx, %ecx, %eax
	+ precalc21 %ymm3
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc62
	+ calc_f2_pre 0x1e8, %eax, %edx, %edi
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc63
	+ calc_f2_pre 0x1ec, %edi, %eax, %esi
	+ precalc23 %ymm3, 0x20, 0xe0
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc64
	+ calc_f2_pre 0x200, %esi, %edi, %ebx
	+ precalc32 %ymm5, %ymm3
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc65
	+ calc_f2_pre 0x204, %ebx, %esi, %ecx
	+ precalc33 %ymm14, %ymm15
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc66
	+ calc_f2_pre 0x208, %ecx, %ebx, %edx
	+ precalc34 %ymm8
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc67
	+ calc_f2_pre 0x20c, %edx, %ecx, %eax
	+ precalc35 %ymm15
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc68
	+ calc_f2_pre 0x220, %eax, %edx, %edi
	+ precalc36 %ymm15
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc69
	+ calc_f2_pre 0x224, %edi, %eax, %esi
	+ precalc37 %ymm15
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc70
	+ calc_f2_pre 0x228, %esi, %edi, %ebx
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc71
	+ calc_f2_pre 0x22c, %ebx, %esi, %ecx
	+ precalc39 %ymm15, 0x20, 0x100
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc72
	+ calc_f2_pre 0x240, %ecx, %ebx, %edx
	+ precalc32 %ymm3, %ymm15
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc73
	+ calc_f2_pre 0x244, %edx, %ecx, %eax
	+ precalc33 %ymm13, %ymm14
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc74
	+ calc_f2_pre 0x248, %eax, %edx, %edi
	+ precalc34 %ymm7
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc75
	+ calc_f2_pre 0x24c, %edi, %eax, %esi
	+ precalc35 %ymm14
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc76
	+ calc_f2_pre 0x260, %esi, %edi, %ebx
	+ precalc36 %ymm14
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc77
	+ calc_f2_pre 0x264, %ebx, %esi, %ecx
	+ precalc37 %ymm14
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc78
	+ calc_f2_pre 0x268, %ecx, %ebx, %edx
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc79
	+ add 0x26c(%r15), %eax
	+ add %ecx, %eax
	+ rorx $0x1b, %edx, %r12d
	+ precalc39 %ymm14, 0x20, 0x120
	+ add %r12d, %eax
	+.endm
	+
	+/*
	+ * Similar to calc0
	+ */
	+.macro calc80
	+ mov %ecx, %edx // precalculate first round
	+ rorx $2, %ecx, %ecx
	+ andn %esi, %edx, %ebp
	+ and %ebx, %edx
	+ xor %ebp, %edx
	+ calc_f1_pre 0x10, %eax, %edx, %ebx, %edi
	+ precalc32 %ymm15, %ymm14
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc81
	+ calc_f1_pre 0x14, %edi, %eax, %ecx, %esi
	+ precalc33 %ymm12, %ymm13
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc82
	+ calc_f1_pre 0x18, %esi, %edi, %edx, %ebx
	+ precalc34 %ymm5
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc83
	+ calc_f1_pre 0x1c, %ebx, %esi, %eax, %ecx
	+ precalc35 %ymm13
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc84
	+ calc_f1_pre 0x30, %ecx, %ebx, %edi, %edx
	+ precalc36 %ymm13
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc85
	+ calc_f1_pre 0x34, %edx, %ecx, %esi, %eax
	+ precalc37 %ymm13
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc86
	+ calc_f1_pre 0x38, %eax, %edx, %ebx, %edi
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc87
	+ calc_f1_pre 0x3c, %edi, %eax, %ecx, %esi
	+ precalc39 %ymm13, 0x40, 0x140
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc88
	+ calc_f1_pre 0x50, %esi, %edi, %edx, %ebx
	+ precalc32 %ymm14, %ymm13
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc89
	+ calc_f1_pre 0x54, %ebx, %esi, %eax, %ecx
	+ precalc33 %ymm8, %ymm12
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc90
	+ calc_f1_pre 0x58, %ecx, %ebx, %edi, %edx
	+ precalc34 %ymm3
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc91
	+ calc_f1_pre 0x5c, %edx, %ecx, %esi, %eax
	+ precalc35 %ymm12
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc92
	+ calc_f1_pre 0x70, %eax, %edx, %ebx, %edi
	+ precalc36 %ymm12
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc93
	+ calc_f1_pre 0x74, %edi, %eax, %ecx, %esi
	+ precalc37 %ymm12
	+ calc_f1_post %edi, %edx, %esi
	+.endm
	+
	+.macro calc94
	+ calc_f1_pre 0x78, %esi, %edi, %edx, %ebx
	+ calc_f1_post %esi, %eax, %ebx
	+.endm
	+
	+.macro calc95
	+ calc_f1_pre 0x7c, %ebx, %esi, %eax, %ecx
	+ precalc39 %ymm12, 0x40, 0x160
	+ calc_f1_post %ebx, %edi, %ecx
	+.endm
	+
	+.macro calc96
	+ calc_f1_pre 0x90, %ecx, %ebx, %edi, %edx
	+ precalc32 %ymm13, %ymm12
	+ calc_f1_post %ecx, %esi, %edx
	+.endm
	+
	+.macro calc97
	+ calc_f1_pre 0x94, %edx, %ecx, %esi, %eax
	+ precalc33 %ymm7, %ymm8
	+ calc_f1_post %edx, %ebx, %eax
	+.endm
	+
	+.macro calc98
	+ calc_f1_pre 0x98, %eax, %edx, %ebx, %edi
	+ precalc34 %ymm15
	+ calc_f1_post %eax, %ecx, %edi
	+.endm
	+
	+.macro calc99
	+ calc_f2_pre 0x9c, %edi, %eax, %esi
	+ precalc35 %ymm8
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc100
	+ calc_f2_pre 0xb0, %esi, %edi, %ebx
	+ precalc36 %ymm8
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc101
	+ calc_f2_pre 0xb4, %ebx, %esi, %ecx
	+ precalc37 %ymm8
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc102
	+ calc_f2_pre 0xb8, %ecx, %ebx, %edx
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc103
	+ calc_f2_pre 0xbc, %edx, %ecx, %eax
	+ precalc39 %ymm8, 0x40, 0x180
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc104
	+ calc_f2_pre 0xd0, %eax, %edx, %edi
	+ precalc32 %ymm12, %ymm8
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc105
	+ calc_f2_pre 0xd4, %edi, %eax, %esi
	+ precalc33 %ymm5, %ymm7
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc106
	+ calc_f2_pre 0xd8, %esi, %edi, %ebx
	+ precalc34 %ymm14
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc107
	+ calc_f2_pre 0xdc, %ebx, %esi, %ecx
	+ precalc35 %ymm7
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc108
	+ calc_f2_pre 0xf0, %ecx, %ebx, %edx
	+ precalc36 %ymm7
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc109
	+ calc_f2_pre 0xf4, %edx, %ecx, %eax
	+ precalc37 %ymm7
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc110
	+ calc_f2_pre 0xf8, %eax, %edx, %edi
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc111
	+ calc_f2_pre 0xfc, %edi, %eax, %esi
	+ precalc39 %ymm7, 0x40, 0x1a0
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc112
	+ calc_f2_pre 0x110, %esi, %edi, %ebx
	+ precalc32 %ymm8, %ymm7
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc113
	+ calc_f2_pre 0x114, %ebx, %esi, %ecx
	+ precalc33 %ymm3, %ymm5
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc114
	+ calc_f2_pre 0x118, %ecx, %ebx, %edx
	+ precalc34 %ymm13
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc115
	+ calc_f2_pre 0x11c, %edx, %ecx, %eax
	+ precalc35 %ymm5
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc116
	+ calc_f2_pre 0x130, %eax, %edx, %edi
	+ precalc37 %ymm5
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc117
	+ calc_f2_pre 0x134, %edi, %eax, %esi
	+ precalc37 %ymm5
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc118
	+ calc_f2_pre 0x138, %esi, %edi, %ebx
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc119
	+ calc_f3_pre 0x13c, %ecx
	+ precalc39 %ymm5, 0x40, 0x1c0
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc120
	+ calc_f3_pre 0x150, %edx
	+ precalc32 %ymm7, %ymm5
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc121
	+ calc_f3_pre 0x154, %eax
	+ precalc33 %ymm15, %ymm3
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc122
	+ calc_f3_pre 0x158, %edi
	+ precalc34 %ymm12
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc123
	+ calc_f3_pre 0x15c, %esi
	+ precalc35 %ymm3
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc124
	+ calc_f3_pre 0x170, %ebx
	+ precalc36 %ymm3
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc125
	+ calc_f3_pre 0x174, %ecx
	+ precalc37 %ymm3
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc126
	+ calc_f3_pre 0x178, %edx
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc127
	+ calc_f3_pre 0x17c, %eax
	+ precalc39 %ymm3, 0x60, 0x1e0
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc128
	+ calc_f3_pre 0x190, %edi
	+ precalc32 %ymm5, %ymm3
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc129
	+ calc_f3_pre 0x194, %esi
	+ precalc33 %ymm14, %ymm15
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc130
	+ calc_f3_pre 0x198, %ebx
	+ precalc34 %ymm8
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc131
	+ calc_f3_pre 0x19c, %ecx
	+ precalc35 %ymm15
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc132
	+ calc_f3_pre 0x1b0, %edx
	+ precalc36 %ymm15
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc133
	+ calc_f3_pre 0x1b4, %eax
	+ precalc37 %ymm15
	+ calc_f3_post %edx, %ebx, %esi, %eax, %ecx
	+.endm
	+
	+.macro calc134
	+ calc_f3_pre 0x1b8, %edi
	+ calc_f3_post %eax, %ecx, %ebx, %edi, %edx
	+.endm
	+
	+.macro calc135
	+ calc_f3_pre 0x1bc, %esi
	+ precalc39 %ymm15, 0x60, 0x200
	+ calc_f3_post %edi, %edx, %ecx, %esi, %eax
	+.endm
	+
	+.macro calc136
	+ calc_f3_pre 0x1d0, %ebx
	+ precalc32 %ymm3, %ymm15
	+ calc_f3_post %esi, %eax, %edx, %ebx, %edi
	+.endm
	+
	+.macro calc137
	+ calc_f3_pre 0x1d4, %ecx
	+ precalc33 %ymm13, %ymm14
	+ calc_f3_post %ebx, %edi, %eax, %ecx, %esi
	+.endm
	+
	+.macro calc138
	+ calc_f3_pre 0x1d8, %edx
	+ precalc34 %ymm7
	+ calc_f3_post %ecx, %esi, %edi, %edx, %ebx
	+.endm
	+
	+.macro calc139
	+ calc_f2_pre 0x1cc, %edx, %ecx, %eax
	+ precalc35 %ymm14
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc140
	+ calc_f2_pre 0x1f0, %eax, %edx, %edi
	+ precalc36 %ymm14
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc141
	+ calc_f2_pre 0x1f4, %edi, %eax, %esi
	+ precalc37 %ymm14
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc142
	+ calc_f2_pre 0x1f8, %esi, %edi, %ebx
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc143
	+ calc_f2_pre 0x1fc, %ebx, %esi, %ecx
	+ precalc39 %ymm14, 0x60, 0x220
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc144
	+ calc_f2_pre 0x210, %ecx, %ebx, %edx
	+ precalc32 %ymm15, %ymm14
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc145
	+ calc_f2_pre 0x214, %edx, %ecx, %eax
	+ precalc33 %ymm12, %ymm13
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc146
	+ calc_f2_pre 0x218, %eax, %edx, %edi
	+ precalc34 %ymm5
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc147
	+ calc_f2_pre 0x21c, %edi, %eax, %esi
	+ precalc35 %ymm13
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc148
	+ calc_f2_pre 0x230, %esi, %edi, %ebx
	+ precalc36 %ymm13
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc149
	+ calc_f2_pre 0x234, %ebx, %esi, %ecx
	+ precalc37 %ymm13
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc150
	+ calc_f2_pre 0x238, %ecx, %ebx, %edx
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc151
	+ calc_f2_pre 0x23c, %edx, %ecx, %eax
	+ precalc39 %ymm13, 0x60, 0x240
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc152
	+ calc_f2_pre 0x250, %eax, %edx, %edi
	+ precalc32 %ymm14, %ymm13
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc153
	+ calc_f2_pre 0x254, %edi, %eax, %esi
	+ precalc33 %ymm8, %ymm12
	+ calc_f2_post %edi, %edx, %ecx, %esi
	+.endm
	+
	+.macro calc154
	+ calc_f2_pre 0x258, %esi, %edi, %ebx
	+ precalc34 %ymm3
	+ calc_f2_post %esi, %eax, %edx, %ebx
	+.endm
	+
	+.macro calc155
	+ calc_f2_pre 0x25c, %ebx, %esi, %ecx
	+ precalc35 %ymm12
	+ calc_f2_post %ebx, %edi, %eax, %ecx
	+.endm
	+
	+.macro calc156
	+ calc_f2_pre 0x270, %ecx, %ebx, %edx
	+ precalc36 %ymm12
	+ calc_f2_post %ecx, %esi, %edi, %edx
	+.endm
	+
	+.macro calc157
	+ calc_f2_pre 0x274, %edx, %ecx, %eax
	+ precalc37 %ymm12
	+ calc_f2_post %edx, %ebx, %esi, %eax
	+.endm
	+
	+.macro calc158
	+ calc_f2_pre 0x278, %eax, %edx, %edi
	+ calc_f2_post %eax, %ecx, %ebx, %edi
	+.endm
	+
	+.macro calc159
	+ add 0x27c(%r15), %esi
	+ add %eax, %esi
	+ rorx $0x1b, %edi, %r12d
	+ precalc39 %ymm12, 0x60, 0x260
	+ add %r12d, %esi
	+.endm
	+
	+ // sha1block(SHA1_CTX, buf, len)
	+ENTRY(_libmd_sha1block_avx2)
	+ push %rbx
	+ push %rbp
	+ push %r12
	+ push %r13
	+ push %r14
	+ push %r15
	+ sub $1408+8, %rsp
	+
	+ and $~63, %rdx
	+ lea k_xmm_ar(%rip), %r8
	+ mov %rdi, %r9
	+ mov %rsi, %r10
	+ lea 64(%rsi), %r13
	+ lea 64(%rsi, %rdx), %r11
	+ cmp %r11, %r13
	+ cmovae %r8, %r13
	+ vmovdqu bswap_shufb_ctl(%rip), %ymm10
	+
	+ mov (%r9), %ecx
	+ mov 4(%r9), %esi
	+ mov 8(%r9), %edi
	+ mov 12(%r9), %eax
	+ mov 16(%r9), %edx
	+ mov %rsp, %r14
	+ lea 2480+32(%rsp), %r15
	+ precalc // precalc WK for first 2 blocks
	+ xchg %r14, %r15
	+
	+ // this is unrolled
	+.Loop: cmp %r8, %r10 // we use the value of R8 (set below)
	+ // as a signal of the last block
	+ jne .Lbegin
	+ add $1408+8, %rsp
	+ pop %r15
	+ pop %r14
	+ pop %r13
	+ pop %r12
	+ pop %rbp
	+ pop %rbx
	+ vzeroupper
	+ ret
	+
	+.Lbegin:
	+ calc0
	+ calc1
	+ calc2
	+ calc3
	+ calc4
	+ calc5
	+ calc6
	+ calc7
	+ calc8
	+ calc9
	+ calc10
	+ calc11
	+ calc12
	+ calc13
	+ calc14
	+ calc15
	+ calc16
	+ calc17
	+ calc18
	+ calc19
	+ calc20
	+ calc21
	+ calc22
	+ calc23
	+ calc24
	+ calc25
	+ calc26
	+ calc27
	+ calc28
	+ calc29
	+ calc30
	+ calc31
	+ calc32
	+ calc33
	+ calc34
	+ calc35
	+ calc36
	+ calc37
	+ calc38
	+ calc39
	+ calc40
	+ calc41
	+ calc42
	+ calc43
	+ calc44
	+ calc45
	+ calc46
	+ calc47
	+ calc48
	+ calc49
	+ calc50
	+ calc51
	+ calc52
	+ calc53
	+ calc54
	+ calc55
	+ calc56
	+ calc57
	+ calc58
	+ calc59
	+
	+ add $128, %r10 // move to the next even-64-byte block
	+ cmp %r11, %r10 // is the current block the last one?
	+ cmovae %r10, %r8 // signal the last iteration smartly
	+
	+ calc60
	+ calc61
	+ calc62
	+ calc63
	+ calc64
	+ calc65
	+ calc66
	+ calc67
	+ calc68
	+ calc69
	+ calc70
	+ calc71
	+ calc72
	+ calc73
	+ calc74
	+ calc75
	+ calc76
	+ calc77
	+ calc78
	+ calc79
	+
	+ update_hash %eax, %edx, %ebx, %esi, %edi
	+ cmp %r8, %r10 // is the current block the last one?
	+ je .Loop
	+ mov %edx, %ecx
	+
	+ calc80
	+ calc81
	+ calc82
	+ calc83
	+ calc84
	+ calc85
	+ calc86
	+ calc87
	+ calc88
	+ calc89
	+ calc90
	+ calc91
	+ calc92
	+ calc93
	+ calc94
	+ calc95
	+ calc96
	+ calc97
	+ calc98
	+ calc99
	+ calc100
	+ calc101
	+ calc102
	+ calc103
	+ calc104
	+ calc105
	+ calc106
	+ calc107
	+ calc108
	+ calc109
	+ calc110
	+ calc111
	+ calc112
	+ calc113
	+ calc114
	+ calc115
	+ calc116
	+ calc117
	+ calc118
	+ calc119
	+ calc120
	+ calc121
	+ calc122
	+ calc123
	+ calc124
	+ calc125
	+ calc126
	+ calc127
	+ calc128
	+ calc129
	+ calc130
	+ calc131
	+ calc132
	+ calc133
	+ calc134
	+ calc135
	+ calc136
	+ calc137
	+ calc138
	+ calc139
	+
	+ add $128, %r13 // move to the next even-64-byte block
	+ cmp %r11, %r13 // is the current block the last one?
	+ cmovae %r8, %r10
	+
	+ calc140
	+ calc141
	+ calc142
	+ calc143
	+ calc144
	+ calc145
	+ calc146
	+ calc147
	+ calc148
	+ calc149
	+ calc150
	+ calc151
	+ calc152
	+ calc153
	+ calc154
	+ calc155
	+ calc156
	+ calc157
	+ calc158
	+ calc159
	+
	+ update_hash %esi, %edi, %edx, %ecx, %ebx
	+ mov %esi, %r12d // reset state for AVX2 reg permutation
	+ mov %edi, %esi
	+ mov %edx, %edi
	+ mov %ebx, %edx
	+ mov %ecx, %eax
	+ mov %r12d, %ecx
	+ xchg %r14, %r15
	+ jmp .Loop
	+END(_libmd_sha1block_avx2)
	+
	+ .section .rodata
	+ .balign 32
	+k_xmm_ar:
	+ .fill 8, 4, 0x5a827999
	+ .fill 8, 4, 0x6ed9eba1
	+ .fill 8, 4, 0x8f1bbcdc
	+ .fill 8, 4, 0xca62c1d6
	+ .size k_xmm_ar, .-k_xmm_ar
	+
	+bswap_shufb_ctl:
	+ .4byte 0x00010203
	+ .4byte 0x04050607
	+ .4byte 0x08090a0b
	+ .4byte 0x0c0d0e0f
	+ .4byte 0x00010203
	+ .4byte 0x04050607
	+ .4byte 0x08090a0b
	+ .4byte 0x0c0d0e0f
	+ .size bswap_shufb_ctl, .-bswap_shufb_ctl
	+
	+ /*
	+ * SHA1 implementation using the Intel SHA extensions (SHANI).
	+ *
	+ * Imlemented according to the Intel white paper
	+ *
	+ * S. Gulley, V. Gopal, K. Yap, W. Feghali, J. Guilford,
	+ * G. Wolrich: "Intel SHA Extensions: new instruction supporting
	+ * the Secure Hash Algorithm on Intel® architecture processors",
	+ * July 2013.
	+ */
	+ // sha1block(SHA1_CTX, buf, len)
	+ENTRY(_libmd_sha1block_shani)
	+ and $~63, %rdx // round length to block-size multiple
	+ lea (%rsi, %rdx, 1), %rcx // end pointer
	+ test %rdx, %rdx // nothing to do?
	+ je 1f // if so, terminate immediately
	+
	+ movdqu (%rdi), %xmm6 // h0, h1, h2, h3
	+ pxor %xmm7, %xmm7
	+ pshufd $0x1b, %xmm6, %xmm6 // h3, h2, h1, h0
	+ pinsrd $3, 16(%rdi), %xmm7 // h4 in the highest word of xmm7
	+ movdqu shuf_mask(%rip), %xmm4
	+
	+ // main loop
	+0: movdqa %xmm6, %xmm8 // stash ABCD
	+ movdqa %xmm7, %xmm9 // stash E
	+
	+ // rounds 0--3
	+ movdqu 0*16(%rsi), %xmm0 // load first message block
	+ pshufb %xmm4, %xmm0 // and byte-swap
	+ paddd %xmm0, %xmm7 // E += w[0]
	+ movdqa %xmm6, %xmm5 // E' = A
	+ sha1rnds4 $0, %xmm7, %xmm6 // perform rounds 0--3
	+
	+ // rounds 4--7
	+ movdqu 1*16(%rsi), %xmm1
	+ pshufb %xmm4, %xmm1
	+ sha1nexte %xmm1, %xmm5
	+ movdqa %xmm6, %xmm7
	+ sha1rnds4 $0, %xmm5, %xmm6
	+ sha1msg1 %xmm1, %xmm0
	+
	+ // rounds 8--11
	+ movdqu 2*16(%rsi), %xmm2
	+ pshufb %xmm4, %xmm2
	+ sha1nexte %xmm2, %xmm7
	+ movdqa %xmm6, %xmm5
	+ sha1rnds4 $0, %xmm7, %xmm6
	+ sha1msg1 %xmm2, %xmm1
	+ pxor %xmm2, %xmm0
	+
	+.macro midround msg3, msg0, msg1, msg2, e1, e0, k
	+ sha1nexte \msg3, \e1
	+ movdqa %xmm6, \e0
	+ sha1msg2 \msg3, \msg0
	+ sha1rnds4 $\k, \e1, %xmm6
	+ sha1msg1 \msg3, \msg2
	+ pxor \msg3, \msg1
	+.endm
	+
	+ movdqu 3*16(%rsi), %xmm3 // load third message block
	+ pshufb %xmm4, %xmm3
	+
	+ add $4*16, %rsi
	+
	+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 0 // 12--15
	+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 0 // 16--19
	+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 20--23
	+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 1 // 24--27
	+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 1 // 28--31
	+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 1 // 32--35
	+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 1 // 36--39
	+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 40--43
	+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 2 // 44--47
	+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 2 // 48--51
	+ midround %xmm1, %xmm2, %xmm3, %xmm0, %xmm5, %xmm7, 2 // 52--55
	+ midround %xmm2, %xmm3, %xmm0, %xmm1, %xmm7, %xmm5, 2 // 56--59
	+ midround %xmm3, %xmm0, %xmm1, %xmm2, %xmm5, %xmm7, 3 // 60--63
	+ midround %xmm0, %xmm1, %xmm2, %xmm3, %xmm7, %xmm5, 3 // 64--67
	+
	+ // rounds 68--71
	+ sha1nexte %xmm1, %xmm5
	+ movdqa %xmm6, %xmm7
	+ sha1msg2 %xmm1, %xmm2
	+ sha1rnds4 $3, %xmm5, %xmm6
	+ pxor %xmm1, %xmm3
	+
	+ // rounds 72--75
	+ sha1nexte %xmm2, %xmm7
	+ movdqa %xmm6, %xmm5
	+ sha1msg2 %xmm2, %xmm3
	+ sha1rnds4 $3, %xmm7, %xmm6
	+
	+ // rounds 76--79
	+ sha1nexte %xmm3, %xmm5
	+ movdqa %xmm6, %xmm7
	+ sha1rnds4 $3, %xmm5, %xmm6
	+
	+ sha1nexte %xmm9, %xmm7 // add saved E
	+ paddd %xmm8, %xmm6 // add saved ABCD
	+
	+ cmp %rsi, %rcx // end reached?
	+ jne 0b
	+
	+ pshufd $0x1b, %xmm6, %xmm6 // restore order of h0--h3
	+ movdqu %xmm6, (%rdi) // write h0--h3
	+ pextrd $3, %xmm7, 16(%rdi) // write h4
	+1: ret
	+END(_libmd_sha1block_shani)
	+
	+ .section .rodata
	+ .balign 16
	+shuf_mask:
	+ .8byte 0x08090a0b0c0d0e0f
	+ .8byte 0x0001020304050607
	+ .size shuf_mask, .-shuf_mask
	+
	+ .section .note.GNU-stack,"",%progbits
	diff --git a/lib/libmd/amd64/sha1dispatch.c b/lib/libmd/amd64/sha1dispatch.c
	new file mode 100644
	--- /dev/null
	+++ b/lib/libmd/amd64/sha1dispatch.c
	@@ -0,0 +1,77 @@
	+/*-
	+ * Copyright (c) 2016 The Go Authors. All rights reserved.
	+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
	+ *
	+ * Adapted from Go's crypto/sha1/sha1block_amd64.go.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions are
	+ * met:
	+ *
	+ * * Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * * Redistributions in binary form must reproduce the above
	+ * copyright notice, this list of conditions and the following disclaimer
	+ * in the documentation and/or other materials provided with the
	+ * distribution.
	+ * * Neither the name of Google Inc. nor the names of its
	+ * contributors may be used to endorse or promote products derived from
	+ * this software without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ */
	+
	+#include <machine/specialreg.h>
	+#include <sha.h>
	+#include <x86/ifunc.h>
	+
	+extern void _libmd_sha1block_scalar(SHA1_CTX , const void , size_t);
	+extern void _libmd_sha1block_avx2(SHA1_CTX , const void , size_t);
	+extern void _libmd_sha1block_shani(SHA1_CTX , const void , size_t);
	+static void sha1block_avx2_wrapper(SHA1_CTX , const void , size_t);
	+
	+#define AVX2_STDEXT_NEEDED \
	+ (CPUID_STDEXT_BMI1 \| CPUID_STDEXT_AVX2 \| CPUID_STDEXT_BMI2)
	+
	+DEFINE_UIFUNC(, void, sha1_block, (SHA1_CTX , const void , size_t))
	+{
	+ if (cpu_stdext_feature & CPUID_STDEXT_SHA)
	+ return (_libmd_sha1block_shani);
	+ if ((cpu_stdext_feature & AVX2_STDEXT_NEEDED) == AVX2_STDEXT_NEEDED)
	+ return (sha1block_avx2_wrapper);
	+ else
	+ return (_libmd_sha1block_scalar);
	+}
	+
	+static void
	+sha1block_avx2_wrapper(SHA1_CTX c, const void data, size_t len)
	+{
	+ if (len >= 256) {
	+ /*
	+ * sha1block_avx2 calculates sha1 for 2 block per iteration.
	+ * It also interleaves the precalculation for next the block.
	+ * So it may read up-to 192 bytes past the end of p.
	+ * We may add checks inside sha1block_avx2, but this will
	+ * just turn it into a copy of sha1block_scalar,
	+ * so call it directly, instead.
	+ */
	+ size_t safe_len = len - 128;
	+
	+ if (safe_len % 128 != 0)
	+ safe_len -= 64;
	+
	+ _libmd_sha1block_avx2(c, data, safe_len);
	+ _libmd_sha1block_scalar(c, data + safe_len, len - safe_len);
	+ } else
	+ _libmd_sha1block_scalar(c, data, len);
	+}
	diff --git a/lib/libmd/i386/sha.S b/lib/libmd/i386/sha.S
	deleted file mode 100644
	--- a/lib/libmd/i386/sha.S
	+++ /dev/null
	@@ -1,1951 +0,0 @@
	-/* -- Fundamental -- Emacs' assembler mode hoses this file */
	-#ifndef PIC
	-/* Run the C pre-processor over this file with one of the following defined
	- * ELF - elf object files,
	- * OUT - a.out object files,
	- * BSDI - BSDI style a.out object files
	- * SOL - Solaris style elf
	- */
	-
	-#define TYPE(a,b) .type a,b
	-#define SIZE(a,b) .size a,b
	-
	-#if defined(OUT) \|\| defined(BSDI)
	-#define sha1_block_x86 _sha1_block_x86
	-
	-#endif
	-
	-#ifdef OUT
	-#define OK 1
	-#define ALIGN 4
	-#endif
	-
	-#ifdef BSDI
	-#define OK 1
	-#define ALIGN 4
	-#undef SIZE
	-#undef TYPE
	-#define SIZE(a,b)
	-#define TYPE(a,b)
	-#endif
	-
	-#if defined(ELF) \|\| defined(SOL)
	-#define OK 1
	-#define ALIGN 4
	-#endif
	-
	-#ifndef OK
	-You need to define one of
	-ELF - elf systems - linux-elf, NetBSD and DG-UX
	-OUT - a.out systems - linux-a.out and FreeBSD
	-SOL - solaris systems, which are elf with strange comment lines
	-BSDI - a.out with a very primative version of as.
	-#endif
	-
	-/* Let the Assembler begin :-) */
	- /* Don't even think of reading this code */
	- /* It was automatically generated by sha1-586.pl */
	- /* Which is a perl program used to generate the x86 assember for */
	- /* any of elf, a.out, BSDI,Win32, or Solaris */
	- /* eric <eay@cryptsoft.com> */
	-
	- .file "sha1-586.s"
	- .version "01.01"
	-gcc2_compiled.:
	-.text
	- .p2align ALIGN
	-.globl sha1_block_x86
	- TYPE(sha1_block_x86,@function)
	-sha1_block_x86:
	- pushl %esi
	- pushl %ebp
	- movl 20(%esp), %eax
	- movl 16(%esp), %esi
	- addl %esi, %eax
	- movl 12(%esp), %ebp
	- pushl %ebx
	- subl $64, %eax
	- pushl %edi
	- movl 4(%ebp), %ebx
	- subl $72, %esp
	- movl 12(%ebp), %edx
	- movl 16(%ebp), %edi
	- movl 8(%ebp), %ecx
	- movl %eax, 68(%esp)
	- /* First we need to setup the X array */
	- movl (%esi), %eax
	-.L000start:
	- /* First, load the words onto the stack in network byte order */
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, (%esp)
	- movl 4(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 4(%esp)
	- movl 8(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 8(%esp)
	- movl 12(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 12(%esp)
	- movl 16(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 16(%esp)
	- movl 20(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 20(%esp)
	- movl 24(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 24(%esp)
	- movl 28(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 28(%esp)
	- movl 32(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 32(%esp)
	- movl 36(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 36(%esp)
	- movl 40(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 40(%esp)
	- movl 44(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 44(%esp)
	- movl 48(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 48(%esp)
	- movl 52(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 52(%esp)
	- movl 56(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 56(%esp)
	- movl 60(%esi), %eax
	-.byte 15
	-.byte 200 /* bswapl %eax */
	- movl %eax, 60(%esp)
	- /* We now have the X array on the stack */
	- /* starting at sp-4 */
	- movl %esi, 64(%esp)
	-
	- /* Start processing */
	- movl (%ebp), %eax
	- /* 00_15 0 */
	- movl %ecx, %esi
	- movl %eax, %ebp
	- xorl %edx, %esi
	- roll $5, %ebp
	- andl %ebx, %esi
	- addl %edi, %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- movl (%esp), %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %edx, %esi
	- leal 1518500249(%ebp,%edi,1),%ebp
	- movl %ebx, %edi
	- addl %ebp, %esi
	- xorl %ecx, %edi
	- movl %esi, %ebp
	- andl %eax, %edi
	- roll $5, %ebp
	- addl %edx, %ebp
	- movl 4(%esp), %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- xorl %ecx, %edi
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- leal 1518500249(%ebp,%edx,1),%ebp
	- addl %ebp, %edi
	- /* 00_15 2 */
	- movl %eax, %edx
	- movl %edi, %ebp
	- xorl %ebx, %edx
	- roll $5, %ebp
	- andl %esi, %edx
	- addl %ecx, %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- movl 8(%esp), %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebx, %edx
	- leal 1518500249(%ebp,%ecx,1),%ebp
	- movl %esi, %ecx
	- addl %ebp, %edx
	- xorl %eax, %ecx
	- movl %edx, %ebp
	- andl %edi, %ecx
	- roll $5, %ebp
	- addl %ebx, %ebp
	- movl 12(%esp), %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- xorl %eax, %ecx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- leal 1518500249(%ebp,%ebx,1),%ebp
	- addl %ebp, %ecx
	- /* 00_15 4 */
	- movl %edi, %ebx
	- movl %ecx, %ebp
	- xorl %esi, %ebx
	- roll $5, %ebp
	- andl %edx, %ebx
	- addl %eax, %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- movl 16(%esp), %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %esi, %ebx
	- leal 1518500249(%ebp,%eax,1),%ebp
	- movl %edx, %eax
	- addl %ebp, %ebx
	- xorl %edi, %eax
	- movl %ebx, %ebp
	- andl %ecx, %eax
	- roll $5, %ebp
	- addl %esi, %ebp
	- movl 20(%esp), %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- xorl %edi, %eax
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- leal 1518500249(%ebp,%esi,1),%ebp
	- addl %ebp, %eax
	- /* 00_15 6 */
	- movl %ecx, %esi
	- movl %eax, %ebp
	- xorl %edx, %esi
	- roll $5, %ebp
	- andl %ebx, %esi
	- addl %edi, %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- movl 24(%esp), %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %edx, %esi
	- leal 1518500249(%ebp,%edi,1),%ebp
	- movl %ebx, %edi
	- addl %ebp, %esi
	- xorl %ecx, %edi
	- movl %esi, %ebp
	- andl %eax, %edi
	- roll $5, %ebp
	- addl %edx, %ebp
	- movl 28(%esp), %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- xorl %ecx, %edi
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- leal 1518500249(%ebp,%edx,1),%ebp
	- addl %ebp, %edi
	- /* 00_15 8 */
	- movl %eax, %edx
	- movl %edi, %ebp
	- xorl %ebx, %edx
	- roll $5, %ebp
	- andl %esi, %edx
	- addl %ecx, %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- movl 32(%esp), %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebx, %edx
	- leal 1518500249(%ebp,%ecx,1),%ebp
	- movl %esi, %ecx
	- addl %ebp, %edx
	- xorl %eax, %ecx
	- movl %edx, %ebp
	- andl %edi, %ecx
	- roll $5, %ebp
	- addl %ebx, %ebp
	- movl 36(%esp), %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- xorl %eax, %ecx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- leal 1518500249(%ebp,%ebx,1),%ebp
	- addl %ebp, %ecx
	- /* 00_15 10 */
	- movl %edi, %ebx
	- movl %ecx, %ebp
	- xorl %esi, %ebx
	- roll $5, %ebp
	- andl %edx, %ebx
	- addl %eax, %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- movl 40(%esp), %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %esi, %ebx
	- leal 1518500249(%ebp,%eax,1),%ebp
	- movl %edx, %eax
	- addl %ebp, %ebx
	- xorl %edi, %eax
	- movl %ebx, %ebp
	- andl %ecx, %eax
	- roll $5, %ebp
	- addl %esi, %ebp
	- movl 44(%esp), %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- xorl %edi, %eax
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- leal 1518500249(%ebp,%esi,1),%ebp
	- addl %ebp, %eax
	- /* 00_15 12 */
	- movl %ecx, %esi
	- movl %eax, %ebp
	- xorl %edx, %esi
	- roll $5, %ebp
	- andl %ebx, %esi
	- addl %edi, %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- movl 48(%esp), %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %edx, %esi
	- leal 1518500249(%ebp,%edi,1),%ebp
	- movl %ebx, %edi
	- addl %ebp, %esi
	- xorl %ecx, %edi
	- movl %esi, %ebp
	- andl %eax, %edi
	- roll $5, %ebp
	- addl %edx, %ebp
	- movl 52(%esp), %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- xorl %ecx, %edi
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- leal 1518500249(%ebp,%edx,1),%ebp
	- addl %ebp, %edi
	- /* 00_15 14 */
	- movl %eax, %edx
	- movl %edi, %ebp
	- xorl %ebx, %edx
	- roll $5, %ebp
	- andl %esi, %edx
	- addl %ecx, %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- movl 56(%esp), %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebx, %edx
	- leal 1518500249(%ebp,%ecx,1),%ebp
	- movl %esi, %ecx
	- addl %ebp, %edx
	- xorl %eax, %ecx
	- movl %edx, %ebp
	- andl %edi, %ecx
	- roll $5, %ebp
	- addl %ebx, %ebp
	- movl 60(%esp), %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- xorl %eax, %ecx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- leal 1518500249(%ebp,%ebx,1),%ebp
	- addl %ebp, %ecx
	- /* 16_19 16 */
	- nop
	- movl (%esp), %ebp
	- movl 8(%esp), %ebx
	- xorl %ebp, %ebx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edi, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %esi, %ebp
	- movl %ebx, (%esp)
	- andl %edx, %ebp
	- leal 1518500249(%ebx,%eax,1),%ebx
	- xorl %esi, %ebp
	- movl %ecx, %eax
	- addl %ebp, %ebx
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- movl 4(%esp), %eax
	- movl 12(%esp), %ebp
	- xorl %ebp, %eax
	- movl 36(%esp), %ebp
	- xorl %ebp, %eax
	- movl 56(%esp), %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %ebp, %eax
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- movl %edx, %ebp
	- xorl %edi, %ebp
	- movl %eax, 4(%esp)
	- andl %ecx, %ebp
	- leal 1518500249(%eax,%esi,1),%eax
	- xorl %edi, %ebp
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %eax
	- /* 16_19 18 */
	- movl 8(%esp), %ebp
	- movl 16(%esp), %esi
	- xorl %ebp, %esi
	- movl 40(%esp), %ebp
	- xorl %ebp, %esi
	- movl 60(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ecx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %edx, %ebp
	- movl %esi, 8(%esp)
	- andl %ebx, %ebp
	- leal 1518500249(%esi,%edi,1),%esi
	- xorl %edx, %ebp
	- movl %eax, %edi
	- addl %ebp, %esi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- movl 12(%esp), %edi
	- movl 20(%esp), %ebp
	- xorl %ebp, %edi
	- movl 44(%esp), %ebp
	- xorl %ebp, %edi
	- movl (%esp), %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %ebp, %edi
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- movl %ebx, %ebp
	- xorl %ecx, %ebp
	- movl %edi, 12(%esp)
	- andl %eax, %ebp
	- leal 1518500249(%edi,%edx,1),%edi
	- xorl %ecx, %ebp
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edi
	- /* 20_39 20 */
	- movl 16(%esp), %edx
	- movl 24(%esp), %ebp
	- xorl %ebp, %edx
	- movl 48(%esp), %ebp
	- xorl %ebp, %edx
	- movl 4(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 16(%esp)
	- xorl %ebx, %ebp
	- leal 1859775393(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 21 */
	- movl 20(%esp), %ecx
	- movl 28(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 8(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 20(%esp)
	- xorl %eax, %ebp
	- leal 1859775393(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 22 */
	- movl 24(%esp), %ebx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 56(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 12(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, 24(%esp)
	- xorl %esi, %ebp
	- leal 1859775393(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 23 */
	- movl 28(%esp), %eax
	- movl 36(%esp), %ebp
	- xorl %ebp, %eax
	- movl 60(%esp), %ebp
	- xorl %ebp, %eax
	- movl 16(%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 28(%esp)
	- xorl %edi, %ebp
	- leal 1859775393(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 24 */
	- movl 32(%esp), %esi
	- movl 40(%esp), %ebp
	- xorl %ebp, %esi
	- movl (%esp), %ebp
	- xorl %ebp, %esi
	- movl 20(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 32(%esp)
	- xorl %edx, %ebp
	- leal 1859775393(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 25 */
	- movl 36(%esp), %edi
	- movl 44(%esp), %ebp
	- xorl %ebp, %edi
	- movl 4(%esp), %ebp
	- xorl %ebp, %edi
	- movl 24(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 36(%esp)
	- xorl %ecx, %ebp
	- leal 1859775393(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 26 */
	- movl 40(%esp), %edx
	- movl 48(%esp), %ebp
	- xorl %ebp, %edx
	- movl 8(%esp), %ebp
	- xorl %ebp, %edx
	- movl 28(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 40(%esp)
	- xorl %ebx, %ebp
	- leal 1859775393(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 27 */
	- movl 44(%esp), %ecx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 12(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 44(%esp)
	- xorl %eax, %ebp
	- leal 1859775393(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 28 */
	- movl 48(%esp), %ebx
	- movl 56(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 16(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 36(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, 48(%esp)
	- xorl %esi, %ebp
	- leal 1859775393(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 29 */
	- movl 52(%esp), %eax
	- movl 60(%esp), %ebp
	- xorl %ebp, %eax
	- movl 20(%esp), %ebp
	- xorl %ebp, %eax
	- movl 40(%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 52(%esp)
	- xorl %edi, %ebp
	- leal 1859775393(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 30 */
	- movl 56(%esp), %esi
	- movl (%esp), %ebp
	- xorl %ebp, %esi
	- movl 24(%esp), %ebp
	- xorl %ebp, %esi
	- movl 44(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 56(%esp)
	- xorl %edx, %ebp
	- leal 1859775393(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 31 */
	- movl 60(%esp), %edi
	- movl 4(%esp), %ebp
	- xorl %ebp, %edi
	- movl 28(%esp), %ebp
	- xorl %ebp, %edi
	- movl 48(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 60(%esp)
	- xorl %ecx, %ebp
	- leal 1859775393(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 32 */
	- movl (%esp), %edx
	- movl 8(%esp), %ebp
	- xorl %ebp, %edx
	- movl 32(%esp), %ebp
	- xorl %ebp, %edx
	- movl 52(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, (%esp)
	- xorl %ebx, %ebp
	- leal 1859775393(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 33 */
	- movl 4(%esp), %ecx
	- movl 12(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 36(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 56(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 4(%esp)
	- xorl %eax, %ebp
	- leal 1859775393(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 34 */
	- movl 8(%esp), %ebx
	- movl 16(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 40(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 60(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, 8(%esp)
	- xorl %esi, %ebp
	- leal 1859775393(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 35 */
	- movl 12(%esp), %eax
	- movl 20(%esp), %ebp
	- xorl %ebp, %eax
	- movl 44(%esp), %ebp
	- xorl %ebp, %eax
	- movl (%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 12(%esp)
	- xorl %edi, %ebp
	- leal 1859775393(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 36 */
	- movl 16(%esp), %esi
	- movl 24(%esp), %ebp
	- xorl %ebp, %esi
	- movl 48(%esp), %ebp
	- xorl %ebp, %esi
	- movl 4(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 16(%esp)
	- xorl %edx, %ebp
	- leal 1859775393(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 37 */
	- movl 20(%esp), %edi
	- movl 28(%esp), %ebp
	- xorl %ebp, %edi
	- movl 52(%esp), %ebp
	- xorl %ebp, %edi
	- movl 8(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 20(%esp)
	- xorl %ecx, %ebp
	- leal 1859775393(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 38 */
	- movl 24(%esp), %edx
	- movl 32(%esp), %ebp
	- xorl %ebp, %edx
	- movl 56(%esp), %ebp
	- xorl %ebp, %edx
	- movl 12(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 24(%esp)
	- xorl %ebx, %ebp
	- leal 1859775393(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 39 */
	- movl 28(%esp), %ecx
	- movl 36(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 60(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 16(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 28(%esp)
	- xorl %eax, %ebp
	- leal 1859775393(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 40_59 40 */
	- movl 32(%esp), %ebx
	- movl 40(%esp), %ebp
	- xorl %ebp, %ebx
	- movl (%esp), %ebp
	- xorl %ebp, %ebx
	- movl 20(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- orl %edi, %ebp
	- movl %ebx, 32(%esp)
	- andl %esi, %ebp
	- leal 2400959708(%ebx,%eax,1),%ebx
	- movl %edx, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- andl %edi, %eax
	- orl %eax, %ebp
	- movl %ecx, %eax
	- roll $5, %eax
	- addl %eax, %ebp
	- movl 36(%esp), %eax
	- addl %ebp, %ebx
	- movl 44(%esp), %ebp
	- xorl %ebp, %eax
	- movl 4(%esp), %ebp
	- xorl %ebp, %eax
	- movl 24(%esp), %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %ebp, %eax
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- movl %ecx, %ebp
	- movl %eax, 36(%esp)
	- orl %edx, %ebp
	- leal 2400959708(%eax,%esi,1),%eax
	- movl %ecx, %esi
	- andl %edi, %ebp
	- andl %edx, %esi
	- orl %esi, %ebp
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %ebp
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %eax
	- /* 40_59 41 */
	- /* 40_59 42 */
	- movl 40(%esp), %esi
	- movl 48(%esp), %ebp
	- xorl %ebp, %esi
	- movl 8(%esp), %ebp
	- xorl %ebp, %esi
	- movl 28(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- orl %ecx, %ebp
	- movl %esi, 40(%esp)
	- andl %edx, %ebp
	- leal 2400959708(%esi,%edi,1),%esi
	- movl %ebx, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- andl %ecx, %edi
	- orl %edi, %ebp
	- movl %eax, %edi
	- roll $5, %edi
	- addl %edi, %ebp
	- movl 44(%esp), %edi
	- addl %ebp, %esi
	- movl 52(%esp), %ebp
	- xorl %ebp, %edi
	- movl 12(%esp), %ebp
	- xorl %ebp, %edi
	- movl 32(%esp), %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %ebp, %edi
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- movl %eax, %ebp
	- movl %edi, 44(%esp)
	- orl %ebx, %ebp
	- leal 2400959708(%edi,%edx,1),%edi
	- movl %eax, %edx
	- andl %ecx, %ebp
	- andl %ebx, %edx
	- orl %edx, %ebp
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %ebp
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edi
	- /* 40_59 43 */
	- /* 40_59 44 */
	- movl 48(%esp), %edx
	- movl 56(%esp), %ebp
	- xorl %ebp, %edx
	- movl 16(%esp), %ebp
	- xorl %ebp, %edx
	- movl 36(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- orl %eax, %ebp
	- movl %edx, 48(%esp)
	- andl %ebx, %ebp
	- leal 2400959708(%edx,%ecx,1),%edx
	- movl %esi, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- andl %eax, %ecx
	- orl %ecx, %ebp
	- movl %edi, %ecx
	- roll $5, %ecx
	- addl %ecx, %ebp
	- movl 52(%esp), %ecx
	- addl %ebp, %edx
	- movl 60(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 20(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 40(%esp), %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebp, %ecx
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- movl %edi, %ebp
	- movl %ecx, 52(%esp)
	- orl %esi, %ebp
	- leal 2400959708(%ecx,%ebx,1),%ecx
	- movl %edi, %ebx
	- andl %eax, %ebp
	- andl %esi, %ebx
	- orl %ebx, %ebp
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ebp
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ecx
	- /* 40_59 45 */
	- /* 40_59 46 */
	- movl 56(%esp), %ebx
	- movl (%esp), %ebp
	- xorl %ebp, %ebx
	- movl 24(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 44(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- orl %edi, %ebp
	- movl %ebx, 56(%esp)
	- andl %esi, %ebp
	- leal 2400959708(%ebx,%eax,1),%ebx
	- movl %edx, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- andl %edi, %eax
	- orl %eax, %ebp
	- movl %ecx, %eax
	- roll $5, %eax
	- addl %eax, %ebp
	- movl 60(%esp), %eax
	- addl %ebp, %ebx
	- movl 4(%esp), %ebp
	- xorl %ebp, %eax
	- movl 28(%esp), %ebp
	- xorl %ebp, %eax
	- movl 48(%esp), %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %ebp, %eax
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- movl %ecx, %ebp
	- movl %eax, 60(%esp)
	- orl %edx, %ebp
	- leal 2400959708(%eax,%esi,1),%eax
	- movl %ecx, %esi
	- andl %edi, %ebp
	- andl %edx, %esi
	- orl %esi, %ebp
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %ebp
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %eax
	- /* 40_59 47 */
	- /* 40_59 48 */
	- movl (%esp), %esi
	- movl 8(%esp), %ebp
	- xorl %ebp, %esi
	- movl 32(%esp), %ebp
	- xorl %ebp, %esi
	- movl 52(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- orl %ecx, %ebp
	- movl %esi, (%esp)
	- andl %edx, %ebp
	- leal 2400959708(%esi,%edi,1),%esi
	- movl %ebx, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- andl %ecx, %edi
	- orl %edi, %ebp
	- movl %eax, %edi
	- roll $5, %edi
	- addl %edi, %ebp
	- movl 4(%esp), %edi
	- addl %ebp, %esi
	- movl 12(%esp), %ebp
	- xorl %ebp, %edi
	- movl 36(%esp), %ebp
	- xorl %ebp, %edi
	- movl 56(%esp), %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %ebp, %edi
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- movl %eax, %ebp
	- movl %edi, 4(%esp)
	- orl %ebx, %ebp
	- leal 2400959708(%edi,%edx,1),%edi
	- movl %eax, %edx
	- andl %ecx, %ebp
	- andl %ebx, %edx
	- orl %edx, %ebp
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %ebp
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edi
	- /* 40_59 49 */
	- /* 40_59 50 */
	- movl 8(%esp), %edx
	- movl 16(%esp), %ebp
	- xorl %ebp, %edx
	- movl 40(%esp), %ebp
	- xorl %ebp, %edx
	- movl 60(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- orl %eax, %ebp
	- movl %edx, 8(%esp)
	- andl %ebx, %ebp
	- leal 2400959708(%edx,%ecx,1),%edx
	- movl %esi, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- andl %eax, %ecx
	- orl %ecx, %ebp
	- movl %edi, %ecx
	- roll $5, %ecx
	- addl %ecx, %ebp
	- movl 12(%esp), %ecx
	- addl %ebp, %edx
	- movl 20(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 44(%esp), %ebp
	- xorl %ebp, %ecx
	- movl (%esp), %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebp, %ecx
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- movl %edi, %ebp
	- movl %ecx, 12(%esp)
	- orl %esi, %ebp
	- leal 2400959708(%ecx,%ebx,1),%ecx
	- movl %edi, %ebx
	- andl %eax, %ebp
	- andl %esi, %ebx
	- orl %ebx, %ebp
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ebp
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ecx
	- /* 40_59 51 */
	- /* 40_59 52 */
	- movl 16(%esp), %ebx
	- movl 24(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 48(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 4(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- orl %edi, %ebp
	- movl %ebx, 16(%esp)
	- andl %esi, %ebp
	- leal 2400959708(%ebx,%eax,1),%ebx
	- movl %edx, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- andl %edi, %eax
	- orl %eax, %ebp
	- movl %ecx, %eax
	- roll $5, %eax
	- addl %eax, %ebp
	- movl 20(%esp), %eax
	- addl %ebp, %ebx
	- movl 28(%esp), %ebp
	- xorl %ebp, %eax
	- movl 52(%esp), %ebp
	- xorl %ebp, %eax
	- movl 8(%esp), %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %ebp, %eax
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- movl %ecx, %ebp
	- movl %eax, 20(%esp)
	- orl %edx, %ebp
	- leal 2400959708(%eax,%esi,1),%eax
	- movl %ecx, %esi
	- andl %edi, %ebp
	- andl %edx, %esi
	- orl %esi, %ebp
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %ebp
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %eax
	- /* 40_59 53 */
	- /* 40_59 54 */
	- movl 24(%esp), %esi
	- movl 32(%esp), %ebp
	- xorl %ebp, %esi
	- movl 56(%esp), %ebp
	- xorl %ebp, %esi
	- movl 12(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- orl %ecx, %ebp
	- movl %esi, 24(%esp)
	- andl %edx, %ebp
	- leal 2400959708(%esi,%edi,1),%esi
	- movl %ebx, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- andl %ecx, %edi
	- orl %edi, %ebp
	- movl %eax, %edi
	- roll $5, %edi
	- addl %edi, %ebp
	- movl 28(%esp), %edi
	- addl %ebp, %esi
	- movl 36(%esp), %ebp
	- xorl %ebp, %edi
	- movl 60(%esp), %ebp
	- xorl %ebp, %edi
	- movl 16(%esp), %ebp
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- xorl %ebp, %edi
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- movl %eax, %ebp
	- movl %edi, 28(%esp)
	- orl %ebx, %ebp
	- leal 2400959708(%edi,%edx,1),%edi
	- movl %eax, %edx
	- andl %ecx, %ebp
	- andl %ebx, %edx
	- orl %edx, %ebp
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %ebp
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edi
	- /* 40_59 55 */
	- /* 40_59 56 */
	- movl 32(%esp), %edx
	- movl 40(%esp), %ebp
	- xorl %ebp, %edx
	- movl (%esp), %ebp
	- xorl %ebp, %edx
	- movl 20(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- orl %eax, %ebp
	- movl %edx, 32(%esp)
	- andl %ebx, %ebp
	- leal 2400959708(%edx,%ecx,1),%edx
	- movl %esi, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- andl %eax, %ecx
	- orl %ecx, %ebp
	- movl %edi, %ecx
	- roll $5, %ecx
	- addl %ecx, %ebp
	- movl 36(%esp), %ecx
	- addl %ebp, %edx
	- movl 44(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 4(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 24(%esp), %ebp
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- xorl %ebp, %ecx
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- movl %edi, %ebp
	- movl %ecx, 36(%esp)
	- orl %esi, %ebp
	- leal 2400959708(%ecx,%ebx,1),%ecx
	- movl %edi, %ebx
	- andl %eax, %ebp
	- andl %esi, %ebx
	- orl %ebx, %ebp
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ebp
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ecx
	- /* 40_59 57 */
	- /* 40_59 58 */
	- movl 40(%esp), %ebx
	- movl 48(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 8(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 28(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- orl %edi, %ebp
	- movl %ebx, 40(%esp)
	- andl %esi, %ebp
	- leal 2400959708(%ebx,%eax,1),%ebx
	- movl %edx, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- andl %edi, %eax
	- orl %eax, %ebp
	- movl %ecx, %eax
	- roll $5, %eax
	- addl %eax, %ebp
	- movl 44(%esp), %eax
	- addl %ebp, %ebx
	- movl 52(%esp), %ebp
	- xorl %ebp, %eax
	- movl 12(%esp), %ebp
	- xorl %ebp, %eax
	- movl 32(%esp), %ebp
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- xorl %ebp, %eax
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- movl %ecx, %ebp
	- movl %eax, 44(%esp)
	- orl %edx, %ebp
	- leal 2400959708(%eax,%esi,1),%eax
	- movl %ecx, %esi
	- andl %edi, %ebp
	- andl %edx, %esi
	- orl %esi, %ebp
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %ebp
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %eax
	- /* 40_59 59 */
	- /* 20_39 60 */
	- movl 48(%esp), %esi
	- movl 56(%esp), %ebp
	- xorl %ebp, %esi
	- movl 16(%esp), %ebp
	- xorl %ebp, %esi
	- movl 36(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 48(%esp)
	- xorl %edx, %ebp
	- leal 3395469782(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 61 */
	- movl 52(%esp), %edi
	- movl 60(%esp), %ebp
	- xorl %ebp, %edi
	- movl 20(%esp), %ebp
	- xorl %ebp, %edi
	- movl 40(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 52(%esp)
	- xorl %ecx, %ebp
	- leal 3395469782(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 62 */
	- movl 56(%esp), %edx
	- movl (%esp), %ebp
	- xorl %ebp, %edx
	- movl 24(%esp), %ebp
	- xorl %ebp, %edx
	- movl 44(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 56(%esp)
	- xorl %ebx, %ebp
	- leal 3395469782(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 63 */
	- movl 60(%esp), %ecx
	- movl 4(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 28(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 48(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 60(%esp)
	- xorl %eax, %ebp
	- leal 3395469782(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 64 */
	- movl (%esp), %ebx
	- movl 8(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, (%esp)
	- xorl %esi, %ebp
	- leal 3395469782(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 65 */
	- movl 4(%esp), %eax
	- movl 12(%esp), %ebp
	- xorl %ebp, %eax
	- movl 36(%esp), %ebp
	- xorl %ebp, %eax
	- movl 56(%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 4(%esp)
	- xorl %edi, %ebp
	- leal 3395469782(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 66 */
	- movl 8(%esp), %esi
	- movl 16(%esp), %ebp
	- xorl %ebp, %esi
	- movl 40(%esp), %ebp
	- xorl %ebp, %esi
	- movl 60(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 8(%esp)
	- xorl %edx, %ebp
	- leal 3395469782(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 67 */
	- movl 12(%esp), %edi
	- movl 20(%esp), %ebp
	- xorl %ebp, %edi
	- movl 44(%esp), %ebp
	- xorl %ebp, %edi
	- movl (%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 12(%esp)
	- xorl %ecx, %ebp
	- leal 3395469782(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 68 */
	- movl 16(%esp), %edx
	- movl 24(%esp), %ebp
	- xorl %ebp, %edx
	- movl 48(%esp), %ebp
	- xorl %ebp, %edx
	- movl 4(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 16(%esp)
	- xorl %ebx, %ebp
	- leal 3395469782(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 69 */
	- movl 20(%esp), %ecx
	- movl 28(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 8(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 20(%esp)
	- xorl %eax, %ebp
	- leal 3395469782(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 70 */
	- movl 24(%esp), %ebx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 56(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 12(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, 24(%esp)
	- xorl %esi, %ebp
	- leal 3395469782(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 71 */
	- movl 28(%esp), %eax
	- movl 36(%esp), %ebp
	- xorl %ebp, %eax
	- movl 60(%esp), %ebp
	- xorl %ebp, %eax
	- movl 16(%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 28(%esp)
	- xorl %edi, %ebp
	- leal 3395469782(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 72 */
	- movl 32(%esp), %esi
	- movl 40(%esp), %ebp
	- xorl %ebp, %esi
	- movl (%esp), %ebp
	- xorl %ebp, %esi
	- movl 20(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 32(%esp)
	- xorl %edx, %ebp
	- leal 3395469782(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 73 */
	- movl 36(%esp), %edi
	- movl 44(%esp), %ebp
	- xorl %ebp, %edi
	- movl 4(%esp), %ebp
	- xorl %ebp, %edi
	- movl 24(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 36(%esp)
	- xorl %ecx, %ebp
	- leal 3395469782(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %ebp, %edx
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	- /* 20_39 74 */
	- movl 40(%esp), %edx
	- movl 48(%esp), %ebp
	- xorl %ebp, %edx
	- movl 8(%esp), %ebp
	- xorl %ebp, %edx
	- movl 28(%esp), %ebp
	- xorl %ebp, %edx
	- movl %esi, %ebp
	-.byte 209
	-.byte 194 /* roll $1 %edx */
	- xorl %eax, %ebp
	- movl %edx, 40(%esp)
	- xorl %ebx, %ebp
	- leal 3395469782(%edx,%ecx,1),%edx
	- movl %edi, %ecx
	- roll $5, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ebp, %ecx
	-.byte 209
	-.byte 206 /* rorl $1 %esi */
	- addl %ecx, %edx
	- /* 20_39 75 */
	- movl 44(%esp), %ecx
	- movl 52(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 12(%esp), %ebp
	- xorl %ebp, %ecx
	- movl 32(%esp), %ebp
	- xorl %ebp, %ecx
	- movl %edi, %ebp
	-.byte 209
	-.byte 193 /* roll $1 %ecx */
	- xorl %esi, %ebp
	- movl %ecx, 44(%esp)
	- xorl %eax, %ebp
	- leal 3395469782(%ecx,%ebx,1),%ecx
	- movl %edx, %ebx
	- roll $5, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebp, %ebx
	-.byte 209
	-.byte 207 /* rorl $1 %edi */
	- addl %ebx, %ecx
	- /* 20_39 76 */
	- movl 48(%esp), %ebx
	- movl 56(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 16(%esp), %ebp
	- xorl %ebp, %ebx
	- movl 36(%esp), %ebp
	- xorl %ebp, %ebx
	- movl %edx, %ebp
	-.byte 209
	-.byte 195 /* roll $1 %ebx */
	- xorl %edi, %ebp
	- movl %ebx, 48(%esp)
	- xorl %esi, %ebp
	- leal 3395469782(%ebx,%eax,1),%ebx
	- movl %ecx, %eax
	- roll $5, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %ebp, %eax
	-.byte 209
	-.byte 202 /* rorl $1 %edx */
	- addl %eax, %ebx
	- /* 20_39 77 */
	- movl 52(%esp), %eax
	- movl 60(%esp), %ebp
	- xorl %ebp, %eax
	- movl 20(%esp), %ebp
	- xorl %ebp, %eax
	- movl 40(%esp), %ebp
	- xorl %ebp, %eax
	- movl %ecx, %ebp
	-.byte 209
	-.byte 192 /* roll $1 %eax */
	- xorl %edx, %ebp
	- movl %eax, 52(%esp)
	- xorl %edi, %ebp
	- leal 3395469782(%eax,%esi,1),%eax
	- movl %ebx, %esi
	- roll $5, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %ebp, %esi
	-.byte 209
	-.byte 201 /* rorl $1 %ecx */
	- addl %esi, %eax
	- /* 20_39 78 */
	- movl 56(%esp), %esi
	- movl (%esp), %ebp
	- xorl %ebp, %esi
	- movl 24(%esp), %ebp
	- xorl %ebp, %esi
	- movl 44(%esp), %ebp
	- xorl %ebp, %esi
	- movl %ebx, %ebp
	-.byte 209
	-.byte 198 /* roll $1 %esi */
	- xorl %ecx, %ebp
	- movl %esi, 56(%esp)
	- xorl %edx, %ebp
	- leal 3395469782(%esi,%edi,1),%esi
	- movl %eax, %edi
	- roll $5, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %ebp, %edi
	-.byte 209
	-.byte 203 /* rorl $1 %ebx */
	- addl %edi, %esi
	- /* 20_39 79 */
	- movl 60(%esp), %edi
	- movl 4(%esp), %ebp
	- xorl %ebp, %edi
	- movl 28(%esp), %ebp
	- xorl %ebp, %edi
	- movl 48(%esp), %ebp
	- xorl %ebp, %edi
	- movl %eax, %ebp
	-.byte 209
	-.byte 199 /* roll $1 %edi */
	- xorl %ebx, %ebp
	- movl %edi, 60(%esp)
	- xorl %ecx, %ebp
	- leal 3395469782(%edi,%edx,1),%edi
	- movl %esi, %edx
	- roll $5, %edx
	- addl %ebp, %edx
	- movl 92(%esp), %ebp
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- addl %edx, %edi
	-.byte 209
	-.byte 200 /* rorl $1 %eax */
	- /* End processing */
	-
	- movl 12(%ebp), %edx
	- addl %ebx, %edx
	- movl 4(%ebp), %ebx
	- addl %esi, %ebx
	- movl %eax, %esi
	- movl (%ebp), %eax
	- movl %edx, 12(%ebp)
	- addl %edi, %eax
	- movl 16(%ebp), %edi
	- addl %ecx, %edi
	- movl 8(%ebp), %ecx
	- addl %esi, %ecx
	- movl %eax, (%ebp)
	- movl 64(%esp), %esi
	- movl %ecx, 8(%ebp)
	- addl $64, %esi
	- movl 68(%esp), %eax
	- movl %edi, 16(%ebp)
	- cmpl %esi, %eax
	- movl %ebx, 4(%ebp)
	- jb .L001end
	- movl (%esi), %eax
	- jmp .L000start
	-.L001end:
	- addl $72, %esp
	- popl %edi
	- popl %ebx
	- popl %ebp
	- popl %esi
	- ret
	-.sha1_block_x86_end:
	- SIZE(sha1_block_x86,.sha1_block_x86_end-sha1_block_x86)
	-.ident "desasm.pl"
	-#endif
	diff --git a/lib/libmd/sha1c.c b/lib/libmd/sha1c.c
	--- a/lib/libmd/sha1c.c
	+++ b/lib/libmd/sha1c.c
	@@ -1,476 +1,244 @@
	-/* crypto/sha/sha1dgst.c */
	-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
	- * All rights reserved.
	+/*-
	+ * Copyright (c) 2009 The Go Authors. All rights reserved.
	+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
	+ *
	+ * Adapted from Go's crypto/sha1/sha1.go.
	*
	- * This package is an SSL implementation written
	- * by Eric Young (eay@cryptsoft.com).
	- * The implementation was written so as to conform with Netscapes SSL.
	- *
	- * This library is free for commercial and non-commercial use as long as
	- * the following conditions are aheared to. The following conditions
	- * apply to all code found in this distribution, be it the RC4, RSA,
	- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
	- * included with this distribution is covered by the same copyright terms
	- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
	- *
	- * Copyright remains Eric Young's, and as such any Copyright notices in
	- * the code are not to be removed.
	- * If this package is used in a product, Eric Young should be given attribution
	- * as the author of the parts of the library used.
	- * This can be in the form of a textual message at program startup or
	- * in documentation (online or textual) provided with the package.
	- *
	* Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. All advertising materials mentioning features or use of this software
	- * must display the following acknowledgement:
	- * "This product includes cryptographic software written by
	- * Eric Young (eay@cryptsoft.com)"
	- * The word 'cryptographic' can be left out if the routines from the library
	- * being used are not cryptographic related :-).
	- * 4. If you include any Windows specific code (or a derivative thereof) from
	- * the apps directory (application code) you must include an acknowledgement:
	- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
	- *
	- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
	- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	- * SUCH DAMAGE.
	- *
	- * The licence and distribution terms for any publically available version or
	- * derivative of this code cannot be changed. i.e. this code cannot simply be
	- * copied and put under another distribution licence
	- * [including the GNU Public Licence.]
	+ * modification, are permitted provided that the following conditions are
	+ * met:
	+ *
	+ * * Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * * Redistributions in binary form must reproduce the above
	+ * copyright notice, this list of conditions and the following disclaimer
	+ * in the documentation and/or other materials provided with the
	+ * distribution.
	+ * * Neither the name of Google Inc. nor the names of its
	+ * contributors may be used to endorse or promote products derived from
	+ * this software without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	-#include <sys/types.h>
	-
	-#include <stdio.h>
	+#include <assert.h>
	+#include <sha.h>
	+#include <stdint.h>
	#include <string.h>
	+#include <strings.h>
	+#include <sys/endian.h>

	-#if 0
	-#include <machine/ansi.h> /* we use the __ variants of bit-sized types */
	+#ifdef SHA1_ASM
	+extern void sha1_block(SHA1_CTX , const void , size_t);
	+#else
	+static void sha1_block(SHA1_CTX , const void , size_t);
	#endif
	-#include <machine/endian.h>

	-#undef SHA_0
	-#define SHA_1
	-#include "sha.h"
	-#include "sha_locl.h"
	+#define INIT0 0x67452301
	+#define INIT1 0xEFCDAB89
	+#define INIT2 0x98BADCFE
	+#define INIT3 0x10325476
	+#define INIT4 0xC3D2E1F0

	-/*
	- * The assembly-language code is not position-independent, so don't
	- * try to use it in a shared library.
	- */
	-#ifdef PIC
	-#undef SHA1_ASM
	-#endif
	+#define K0 0x5A827999
	+#define K1 0x6ED9EBA1
	+#define K2 0x8F1BBCDC
	+#define K3 0xCA62C1D6

	-static char *SHA1_version="SHA1 part of SSLeay 0.9.0b 11-Oct-1998";
	+void
	+SHA1_Init(SHA1_CTX *c)
	+{
	+ c->h0 = INIT0;
	+ c->h1 = INIT1;
	+ c->h2 = INIT2;
	+ c->h3 = INIT3;
	+ c->h4 = INIT4;
	+ c->Nl = 0;
	+ c->Nh = 0;
	+ c->num = 0;
	+}

	-/* Implemented from SHA-1 document - The Secure Hash Algorithm
	- */
	+void
	+SHA1_Update(SHA1_CTX c, const void data, size_t len)
	+{
	+ uint64_t nn;
	+ const char *p = data;

	-#define INIT_DATA_h0 (unsigned long)0x67452301L
	-#define INIT_DATA_h1 (unsigned long)0xefcdab89L
	-#define INIT_DATA_h2 (unsigned long)0x98badcfeL
	-#define INIT_DATA_h3 (unsigned long)0x10325476L
	-#define INIT_DATA_h4 (unsigned long)0xc3d2e1f0L
	-
	-#define K_00_19 0x5a827999L
	-#define K_20_39 0x6ed9eba1L
	-#define K_40_59 0x8f1bbcdcL
	-#define K_60_79 0xca62c1d6L
	-
	-#ifndef NOPROTO
	-# ifdef SHA1_ASM
	- void sha1_block_x86(SHA_CTX c, const u_int32_t p, int num);
	-# define sha1_block sha1_block_x86
	-# else
	- void sha1_block(SHA_CTX c, const u_int32_t p, int num);
	-# endif
	-#else
	-# ifdef SHA1_ASM
	- void sha1_block_x86();
	-# define sha1_block sha1_block_x86
	-# else
	- void sha1_block();
	-# endif
	-#endif
	+ nn = (uint64_t)c->Nl \| (uint64_t)c->Nh << 32;
	+ nn += len;
	+ c->Nl = (uint32_t)nn;
	+ c->Nh = (uint32_t)(nn >> 32);

	+ if (c->num > 0) {
	+ size_t n = SHA_CBLOCK - c->num;

	-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
	-# define M_c2nl c2l
	-# define M_p_c2nl p_c2l
	-# define M_c2nl_p c2l_p
	-# define M_p_c2nl_p p_c2l_p
	-# define M_nl2c l2c
	-#else
	-# define M_c2nl c2nl
	-# define M_p_c2nl p_c2nl
	-# define M_c2nl_p c2nl_p
	-# define M_p_c2nl_p p_c2nl_p
	-# define M_nl2c nl2c
	-#endif
	+ if (n > len)
	+ n = len;
	+
	+ memcpy((char *)c->data + c->num, p, n);
	+ c->num += n;
	+ if (c->num == SHA_CBLOCK) {
	+ sha1_block(c, (void *)c->data, SHA_CBLOCK);
	+ c->num = 0;
	+ }
	+
	+ p += n;
	+ len -= n;
	+ }
	+
	+ if (len >= SHA_CBLOCK) {
	+ size_t n = len & ~(size_t)(SHA_CBLOCK - 1);
	+
	+ sha1_block(c, p, n);
	+ p += n;
	+ len -= n;
	+ }

	-void SHA1_Init(SHA_CTX *c)
	- {
	- c->h0=INIT_DATA_h0;
	- c->h1=INIT_DATA_h1;
	- c->h2=INIT_DATA_h2;
	- c->h3=INIT_DATA_h3;
	- c->h4=INIT_DATA_h4;
	- c->Nl=0;
	- c->Nh=0;
	- c->num=0;
	+ if (len > 0) {
	+ memcpy(c->data, p, len);
	+ c->num = len;
	}
	+}

	void
	-SHA1_Update(SHA_CTX c, const void in, size_t len)
	+SHA1_Final(unsigned char md, SHA1_CTX c)
	{
	- u_int32_t *p;
	- int ew,ec,sw,sc;
	- u_int32_t l;
	- const unsigned char *data = in;
	-
	- if (len == 0) return;
	-
	- l=(c->Nl+(len<<3))&0xffffffffL;
	- if (l < c->Nl) /* overflow */
	- c->Nh++;
	- c->Nh+=(len>>29);
	- c->Nl=l;
	-
	- if (c->num != 0)
	- {
	- p=c->data;
	- sw=c->num>>2;
	- sc=c->num&0x03;
	-
	- if ((c->num+len) >= SHA_CBLOCK)
	- {
	- l= p[sw];
	- M_p_c2nl(data,l,sc);
	- p[sw++]=l;
	- for (; sw<SHA_LBLOCK; sw++)
	- {
	- M_c2nl(data,l);
	- p[sw]=l;
	- }
	- len-=(SHA_CBLOCK-c->num);
	-
	- sha1_block(c,p,64);
	- c->num=0;
	- /* drop through and do the rest */
	- }
	- else
	- {
	- c->num+=(int)len;
	- if ((sc+len) < 4) /* ugly, add char's to a word */
	- {
	- l= p[sw];
	- M_p_c2nl_p(data,l,sc,len);
	- p[sw]=l;
	- }
	- else
	- {
	- ew=(c->num>>2);
	- ec=(c->num&0x03);
	- l= p[sw];
	- M_p_c2nl(data,l,sc);
	- p[sw++]=l;
	- for (; sw < ew; sw++)
	- { M_c2nl(data,l); p[sw]=l; }
	- if (ec)
	- {
	- M_c2nl_p(data,l,ec);
	- p[sw]=l;
	- }
	- }
	- return;
	- }
	+ uint64_t len;
	+ size_t t;
	+ unsigned char tmp[SHA_CBLOCK + sizeof(uint64_t)] = {0x80, 0};
	+
	+ len = (uint64_t)c->Nl \| (uint64_t)c->Nh << 32;
	+ t = 64 + 56 - c->Nl % 64;
	+ if (t > 64)
	+ t -= 64;
	+
	+ /* length in bits */
	+ len <<= 3;
	+ be64enc(tmp + t, len);
	+ SHA1_Update(c, tmp, t + 8);
	+ assert(c->num == 0);
	+
	+ be32enc(md + 0, c->h0);
	+ be32enc(md + 4, c->h1);
	+ be32enc(md + 8, c->h2);
	+ be32enc(md + 12, c->h3);
	+ be32enc(md + 16, c->h4);
	+
	+ explicit_bzero(c, sizeof(*c));
	+}
	+
	+#ifndef SHA1_ASM
	+static void
	+/* invariant: len is a multiple of SHA_CBLOCK */
	+sha1_block(SHA1_CTX c, const void data, size_t len)
	+{
	+ uint32_t w[16];
	+ uint32_t h0 = c->h0, h1 = c->h1, h2 = c->h2, h3 = c->h3, h4 = c->h4;
	+ const char *p = data;
	+
	+ while (len >= SHA_CBLOCK) {
	+ size_t i;
	+ uint32_t a = h0, b = h1, c = h2, d = h3, e = h4;
	+ uint32_t f, t, tmp;
	+
	+# pragma unroll
	+ for (i = 0; i < 16; i++)
	+ w[i] = be32dec(p + 4*i);
	+
	+# pragma unroll
	+ for (i = 0; i < 16; i++) {
	+ f = b & c \| ~b & d;
	+ t = (a << 5 \| a >> 32 - 5) + f + e + w[i & 0xf] + K0;
	+ e = d;
	+ d = c;
	+ c = b << 30 \| b >> 32 - 30;
	+ b = a;
	+ a = t;
	}
	- /* We can only do the following code for assember, the reason
	- * being that the sha1_block 'C' version changes the values
	- * in the 'data' array. The assember code avoids this and
	- * copies it to a local array. I should be able to do this for
	- * the C version as well....
	- */
	-#if 1
	-#if BYTE_ORDER == BIG_ENDIAN \|\| defined(SHA1_ASM)
	- if ((((unsigned int)data)%sizeof(u_int32_t)) == 0)
	- {
	- sw=len/SHA_CBLOCK;
	- if (sw)
	- {
	- sw*=SHA_CBLOCK;
	- sha1_block(c,(u_int32_t *)data,sw);
	- data+=sw;
	- len-=sw;
	- }
	+
	+# pragma unroll
	+ for (; i < 20; i++) {
	+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
	+ w[i & 0xf] = tmp << 1 \| tmp >> 32 - 1;
	+
	+ f = b & c \| ~b & d;
	+ t = (a << 5 \| a >> 32 - 5) + f + e + w[i & 0xf] + K0;
	+ e = d;
	+ d = c;
	+ c = b << 30 \| b >> 32 - 30;
	+ b = a;
	+ a = t;
	}
	-#endif
	-#endif
	- /* we now can process the input data in blocks of SHA_CBLOCK
	- * chars and save the leftovers to c->data. */
	- p=c->data;
	- while (len >= SHA_CBLOCK)
	- {
	-#if BYTE_ORDER == BIG_ENDIAN \|\| BYTE_ORDER == LITTLE_ENDIAN
	- if (p != (u_int32_t *)data)
	- memcpy(p,data,SHA_CBLOCK);
	- data+=SHA_CBLOCK;
	-# if BYTE_ORDER == LITTLE_ENDIAN
	-# ifndef SHA1_ASM /* Will not happen */
	- for (sw=(SHA_LBLOCK/4); sw; sw--)
	- {
	- Endian_Reverse32(p[0]);
	- Endian_Reverse32(p[1]);
	- Endian_Reverse32(p[2]);
	- Endian_Reverse32(p[3]);
	- p+=4;
	- }
	- p=c->data;
	-# endif
	-# endif
	-#else
	- for (sw=(SHA_BLOCK/4); sw; sw--)
	- {
	- M_c2nl(data,l); *(p++)=l;
	- M_c2nl(data,l); *(p++)=l;
	- M_c2nl(data,l); *(p++)=l;
	- M_c2nl(data,l); *(p++)=l;
	- }
	- p=c->data;
	-#endif
	- sha1_block(c,p,64);
	- len-=SHA_CBLOCK;
	+
	+# pragma unroll
	+ for (; i < 40; i++) {
	+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
	+ w[i & 0xf] = tmp << 1 \| tmp >> 32 - 1;
	+
	+ f = b ^ c ^ d;
	+ t = (a << 5 \| a >> 32 - 5) + f + e + w[i & 0xf] + K1;
	+ e = d;
	+ d = c;
	+ c = b << 30 \| b >> 32 - 30;
	+ b = a;
	+ a = t;
	}
	- ec=(int)len;
	- c->num=ec;
	- ew=(ec>>2);
	- ec&=0x03;
	-
	- for (sw=0; sw < ew; sw++)
	- { M_c2nl(data,l); p[sw]=l; }
	- M_c2nl_p(data,l,ec);
	- p[sw]=l;
	- }

	-static void SHA1_Transform(SHA_CTX c, unsigned char b)
	- {
	- u_int32_t p[16];
	-#if BYTE_ORDER != BIG_ENDIAN
	- u_int32_t *q;
	- int i;
	-#endif
	+# pragma unroll
	+ for (; i < 60; i++) {
	+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
	+ w[i & 0xf] = tmp << 1 \| tmp >> 32 - 1;
	+
	+ f = (b \| c) & d \| b & c;
	+ t = (a << 5 \| a >> 32 - 5) + f + e + w[i & 0xf] + K2;
	+ e = d;
	+ d = c;
	+ c = b << 30 \| b >> 32 - 30;
	+ b = a;
	+ a = t;
	+ }

	-#if BYTE_ORDER == BIG_ENDIAN \|\| BYTE_ORDER == LITTLE_ENDIAN
	- memcpy(p,b,64);
	-#if BYTE_ORDER == LITTLE_ENDIAN
	- q=p;
	- for (i=(SHA_LBLOCK/4); i; i--)
	- {
	- Endian_Reverse32(q[0]);
	- Endian_Reverse32(q[1]);
	- Endian_Reverse32(q[2]);
	- Endian_Reverse32(q[3]);
	- q+=4;
	+# pragma unroll
	+ for (; i < 80; i++) {
	+ tmp = w[i - 3 & 0xf] ^ w[i - 8 & 0xf] ^ w[i - 14 & 0xf] ^ w[i & 0xf];
	+ w[i & 0xf] = tmp << 1 \| tmp >> 32 - 1;
	+
	+ f = b ^ c ^ d;
	+ t = (a << 5 \| a >> 32 - 5) + f + e + w[i & 0xf] + K3;
	+ e = d;
	+ d = c;
	+ c = b << 30 \| b >> 32 - 30;
	+ b = a;
	+ a = t;
	}
	-#endif
	-#else
	- q=p;
	- for (i=(SHA_LBLOCK/4); i; i--)
	- {
	- u_int32_t l;
	- c2nl(b,l); *(q++)=l;
	- c2nl(b,l); *(q++)=l;
	- c2nl(b,l); *(q++)=l;
	- c2nl(b,l); *(q++)=l;
	- }
	-#endif
	- sha1_block(c,p,64);
	- }

	-#ifndef SHA1_ASM
	+ h0 += a;
	+ h1 += b;
	+ h2 += c;
	+ h3 += d;
	+ h4 += e;

	-void
	-sha1_block(SHA_CTX c, const u_int32_t W, int num)
	-{
	- u_int32_t A,B,C,D,E,T;
	- u_int32_t X[16];
	-
	- A=c->h0;
	- B=c->h1;
	- C=c->h2;
	- D=c->h3;
	- E=c->h4;
	-
	- for (;;)
	- {
	- BODY_00_15( 0,A,B,C,D,E,T,W);
	- BODY_00_15( 1,T,A,B,C,D,E,W);
	- BODY_00_15( 2,E,T,A,B,C,D,W);
	- BODY_00_15( 3,D,E,T,A,B,C,W);
	- BODY_00_15( 4,C,D,E,T,A,B,W);
	- BODY_00_15( 5,B,C,D,E,T,A,W);
	- BODY_00_15( 6,A,B,C,D,E,T,W);
	- BODY_00_15( 7,T,A,B,C,D,E,W);
	- BODY_00_15( 8,E,T,A,B,C,D,W);
	- BODY_00_15( 9,D,E,T,A,B,C,W);
	- BODY_00_15(10,C,D,E,T,A,B,W);
	- BODY_00_15(11,B,C,D,E,T,A,W);
	- BODY_00_15(12,A,B,C,D,E,T,W);
	- BODY_00_15(13,T,A,B,C,D,E,W);
	- BODY_00_15(14,E,T,A,B,C,D,W);
	- BODY_00_15(15,D,E,T,A,B,C,W);
	- BODY_16_19(16,C,D,E,T,A,B,W,W,W,W);
	- BODY_16_19(17,B,C,D,E,T,A,W,W,W,W);
	- BODY_16_19(18,A,B,C,D,E,T,W,W,W,W);
	- BODY_16_19(19,T,A,B,C,D,E,W,W,W,X);
	-
	- BODY_20_31(20,E,T,A,B,C,D,W,W,W,X);
	- BODY_20_31(21,D,E,T,A,B,C,W,W,W,X);
	- BODY_20_31(22,C,D,E,T,A,B,W,W,W,X);
	- BODY_20_31(23,B,C,D,E,T,A,W,W,W,X);
	- BODY_20_31(24,A,B,C,D,E,T,W,W,X,X);
	- BODY_20_31(25,T,A,B,C,D,E,W,W,X,X);
	- BODY_20_31(26,E,T,A,B,C,D,W,W,X,X);
	- BODY_20_31(27,D,E,T,A,B,C,W,W,X,X);
	- BODY_20_31(28,C,D,E,T,A,B,W,W,X,X);
	- BODY_20_31(29,B,C,D,E,T,A,W,W,X,X);
	- BODY_20_31(30,A,B,C,D,E,T,W,X,X,X);
	- BODY_20_31(31,T,A,B,C,D,E,W,X,X,X);
	- BODY_32_39(32,E,T,A,B,C,D,X);
	- BODY_32_39(33,D,E,T,A,B,C,X);
	- BODY_32_39(34,C,D,E,T,A,B,X);
	- BODY_32_39(35,B,C,D,E,T,A,X);
	- BODY_32_39(36,A,B,C,D,E,T,X);
	- BODY_32_39(37,T,A,B,C,D,E,X);
	- BODY_32_39(38,E,T,A,B,C,D,X);
	- BODY_32_39(39,D,E,T,A,B,C,X);
	-
	- BODY_40_59(40,C,D,E,T,A,B,X);
	- BODY_40_59(41,B,C,D,E,T,A,X);
	- BODY_40_59(42,A,B,C,D,E,T,X);
	- BODY_40_59(43,T,A,B,C,D,E,X);
	- BODY_40_59(44,E,T,A,B,C,D,X);
	- BODY_40_59(45,D,E,T,A,B,C,X);
	- BODY_40_59(46,C,D,E,T,A,B,X);
	- BODY_40_59(47,B,C,D,E,T,A,X);
	- BODY_40_59(48,A,B,C,D,E,T,X);
	- BODY_40_59(49,T,A,B,C,D,E,X);
	- BODY_40_59(50,E,T,A,B,C,D,X);
	- BODY_40_59(51,D,E,T,A,B,C,X);
	- BODY_40_59(52,C,D,E,T,A,B,X);
	- BODY_40_59(53,B,C,D,E,T,A,X);
	- BODY_40_59(54,A,B,C,D,E,T,X);
	- BODY_40_59(55,T,A,B,C,D,E,X);
	- BODY_40_59(56,E,T,A,B,C,D,X);
	- BODY_40_59(57,D,E,T,A,B,C,X);
	- BODY_40_59(58,C,D,E,T,A,B,X);
	- BODY_40_59(59,B,C,D,E,T,A,X);
	-
	- BODY_60_79(60,A,B,C,D,E,T,X);
	- BODY_60_79(61,T,A,B,C,D,E,X);
	- BODY_60_79(62,E,T,A,B,C,D,X);
	- BODY_60_79(63,D,E,T,A,B,C,X);
	- BODY_60_79(64,C,D,E,T,A,B,X);
	- BODY_60_79(65,B,C,D,E,T,A,X);
	- BODY_60_79(66,A,B,C,D,E,T,X);
	- BODY_60_79(67,T,A,B,C,D,E,X);
	- BODY_60_79(68,E,T,A,B,C,D,X);
	- BODY_60_79(69,D,E,T,A,B,C,X);
	- BODY_60_79(70,C,D,E,T,A,B,X);
	- BODY_60_79(71,B,C,D,E,T,A,X);
	- BODY_60_79(72,A,B,C,D,E,T,X);
	- BODY_60_79(73,T,A,B,C,D,E,X);
	- BODY_60_79(74,E,T,A,B,C,D,X);
	- BODY_60_79(75,D,E,T,A,B,C,X);
	- BODY_60_79(76,C,D,E,T,A,B,X);
	- BODY_60_79(77,B,C,D,E,T,A,X);
	- BODY_60_79(78,A,B,C,D,E,T,X);
	- BODY_60_79(79,T,A,B,C,D,E,X);
	-
	- c->h0=(c->h0+E)&0xffffffffL;
	- c->h1=(c->h1+T)&0xffffffffL;
	- c->h2=(c->h2+A)&0xffffffffL;
	- c->h3=(c->h3+B)&0xffffffffL;
	- c->h4=(c->h4+C)&0xffffffffL;
	-
	- num-=64;
	- if (num <= 0) break;
	-
	- A=c->h0;
	- B=c->h1;
	- C=c->h2;
	- D=c->h3;
	- E=c->h4;
	-
	- W+=16;
	- }
	+ p += SHA_CBLOCK;
	+ len -= SHA_CBLOCK;
	}
	-#endif

	-void SHA1_Final(unsigned char md, SHA_CTX c)
	- {
	- int i,j;
	- u_int32_t l;
	- u_int32_t *p;
	- static unsigned char end[4]={0x80,0x00,0x00,0x00};
	- unsigned char *cp=end;
	-
	- /* c->num should definitly have room for at least one more byte. */
	- p=c->data;
	- j=c->num;
	- i=j>>2;
	-#ifdef PURIFY
	- if ((j&0x03) == 0) p[i]=0;
	+ c->h0 = h0;
	+ c->h1 = h1;
	+ c->h2 = h2;
	+ c->h3 = h3;
	+ c->h4 = h4;
	+}
	#endif
	- l=p[i];
	- M_p_c2nl(cp,l,j&0x03);
	- p[i]=l;
	- i++;
	- /* i is the next 'undefined word' */
	- if (c->num >= SHA_LAST_BLOCK)
	- {
	- for (; i<SHA_LBLOCK; i++)
	- p[i]=0;
	- sha1_block(c,p,64);
	- i=0;
	- }
	- for (; i<(SHA_LBLOCK-2); i++)
	- p[i]=0;
	- p[SHA_LBLOCK-2]=c->Nh;
	- p[SHA_LBLOCK-1]=c->Nl;
	-#if BYTE_ORDER == LITTLE_ENDIAN && defined(SHA1_ASM)
	- Endian_Reverse32(p[SHA_LBLOCK-2]);
	- Endian_Reverse32(p[SHA_LBLOCK-1]);
	-#endif
	- sha1_block(c,p,64);
	- cp=md;
	- l=c->h0; nl2c(l,cp);
	- l=c->h1; nl2c(l,cp);
	- l=c->h2; nl2c(l,cp);
	- l=c->h3; nl2c(l,cp);
	- l=c->h4; nl2c(l,cp);
	-
	- /* Clear the context state */
	- explicit_bzero(&c, sizeof(c));
	- }

	#ifdef WEAK_REFS
	/* When building libmd, provide weak references. Note: this is not

File Metadata

Mime Type: text/plain
Expires: Sat, Feb 1, 2:55 AM (16 h, 50 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16374401
Default Alt Text: D45444.diff (115 KB)

D45444.diffNo OneTemporaryActions

D45444.diffView Options

File Metadata

Event Timeline

D45444.diff
No OneTemporary
Actions

D45444.diff
View Options