diff --git a/lib/libc/Makefile b/lib/libc/Makefile --- a/lib/libc/Makefile +++ b/lib/libc/Makefile @@ -109,7 +109,7 @@ .include "${LIBC_SRCTOP}/inet/Makefile.inc" .include "${LIBC_SRCTOP}/isc/Makefile.inc" .include "${LIBC_SRCTOP}/locale/Makefile.inc" -.include "${LIBC_SRCTOP}/md/Makefile.inc" +.include "${SRCTOP}/lib/libmd/Makefile.md5.inc" .include "${LIBC_SRCTOP}/nameser/Makefile.inc" .include "${LIBC_SRCTOP}/net/Makefile.inc" .include "${LIBC_SRCTOP}/nls/Makefile.inc" diff --git a/lib/libc/md/Makefile.inc b/lib/libc/md/Makefile.inc deleted file mode 100644 --- a/lib/libc/md/Makefile.inc +++ /dev/null @@ -1,3 +0,0 @@ -.PATH: ${SRCTOP}/sys/kern - -SRCS+= md5c.c diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile --- a/lib/libmd/Makefile +++ b/lib/libmd/Makefile @@ -6,7 +6,7 @@ PACKAGE= runtime LIB= md SHLIB_MAJOR= 7 -SRCS= md4c.c md5c.c md4hl.c md5hl.c \ +SRCS= md4c.c md4hl.c md5hl.c \ rmd160c.c rmd160hl.c \ sha0c.c sha0hl.c sha1c.c sha1hl.c \ sha224hl.c sha256c.c sha256hl.c \ @@ -150,6 +150,8 @@ .endif .endif # ${USE_ASM_SOURCES} != 0 +.include "Makefile.md5.inc" + md4hl.c: mdXhl.c (echo '#define LENGTH 16'; \ sed -e 's/mdX/md4/g' -e 's/MDX/MD4/g' ${.ALLSRC}) > ${.TARGET} diff --git a/lib/libmd/Makefile.md5.inc b/lib/libmd/Makefile.md5.inc new file mode 100644 --- /dev/null +++ b/lib/libmd/Makefile.md5.inc @@ -0,0 +1,17 @@ +# include the MD5 hash function into the build + +.PATH: ${SRCTOP}/sys/crypto/md5 + +SRCS+= md5c.c + +USE_ASM_SOURCES?= 1 + +.if ${USE_ASM_SOURCES} != 0 && !defined(BOOTSTRAPPING) && ${MK_MACHDEP_OPTIMIZATIONS} != no +.if exists(md5block_${MACHINE_ARCH}.S) +SRCS+= md5block_${MACHINE_ARCH}.S +CFLAGS.md5c.c+= -DMD5_ASM +.if exists(md5dispatch_${MACHINE_ARCH}.c) +SRCS+= md5dispatch_${MACHINE_ARCH}.c +.endif +.endif +.endif diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile --- a/stand/libsa/Makefile +++ b/stand/libsa/Makefile @@ -189,9 +189,9 @@ .PATH: ${SYSDIR}/crypto/sha2 SRCS+= sha256c.c sha512c.c -# md5 from the kernel -.PATH: ${SYSDIR}/kern -SRCS+= md5c.c +# md5 from the kernel, but avoid SIMD implementations +USE_ASM_SOURCES=0 +.include "${SRCTOP}/lib/libmd/Makefile.md5.inc" .if ${DO32:U0} == 0 MAN=libsa.3 diff --git a/sys/conf/Makefile.amd64 b/sys/conf/Makefile.amd64 --- a/sys/conf/Makefile.amd64 +++ b/sys/conf/Makefile.amd64 @@ -35,6 +35,8 @@ CFLAGS+= -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer .endif +CFLAGS.md5c.c+= -DMD5_ASM + %BEFORE_DEPEND %OBJS diff --git a/sys/conf/Makefile.arm64 b/sys/conf/Makefile.arm64 --- a/sys/conf/Makefile.arm64 +++ b/sys/conf/Makefile.arm64 @@ -60,6 +60,8 @@ CFLAGS += -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer .endif +CFLAGS.md5c.c+= -DMD5_ASM + %BEFORE_DEPEND %OBJS diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -697,6 +697,7 @@ compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium" crypto/des/des_ecb.c optional netsmb crypto/des/des_setkey.c optional netsmb +crypto/md5/md5c.c standard crypto/openssl/ossl.c optional ossl crypto/openssl/ossl_aes.c optional ossl crypto/openssl/ossl_chacha20.c optional ossl @@ -3871,7 +3872,6 @@ kern/link_elf.c standard kern/linker_if.m standard kern/md4c.c optional netsmb -kern/md5c.c standard kern/p1003_1b.c standard kern/posix4_mib.c standard kern/sched_4bsd.c optional sched_4bsd diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -98,6 +98,8 @@ cddl/dev/dtrace/amd64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" crypto/aesni/aeskeys_amd64.S optional aesni crypto/des/des_enc.c optional netsmb +crypto/md5/md5block_amd64.S standard +crypto/md5/md5dispatch_amd64.c standard crypto/openssl/amd64/aes-gcm-avx512.S optional ossl crypto/openssl/amd64/aesni-x86_64.S optional ossl crypto/openssl/amd64/aesni-gcm-x86_64.S optional ossl diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -171,6 +171,7 @@ clean "ghashv8-armx.o" crypto/des/des_enc.c optional netsmb +crypto/md5/md5block_aarch64.S standard crypto/openssl/ossl_aarch64.c optional ossl crypto/openssl/aarch64/chacha-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" diff --git a/sys/crypto/md5/md5block_aarch64.S b/sys/crypto/md5/md5block_aarch64.S new file mode 100644 --- /dev/null +++ b/sys/crypto/md5/md5block_aarch64.S @@ -0,0 +1,213 @@ +/*- + * Copyright (c) 2024 Robert Clausecker + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +.macro round a, b, c, d, f, k, m, s + \f f, \b, \c, \d +.if 0x100000000 - \k > 0x00ffffff + movz k, #\k & 0xffff + movk k, #\k >> 16, lsl #16 + add k, k, \m // k[i] + m[g] +.elseif 0x100000000 - \k > 0x0000ffff + sub k, \m, #(0x100000000 - \k) & 0xfff000 + sub k, k, #(0x100000000 - \k) & 0xfff +.else + movz k, #0x100000000 - \k + sub k, \m, k +.endif + add \a, \a, k // k[i] + m[g] + a + add \a, \a, f // k[i] + m[g] + a + f + ror \a, \a, #32-\s + add \a, \a, \b +.endm + + /* f = b ? c : d */ +.macro f0 f, b, c, d + eor \f, \c, \d + and \f, \f, \b + eor \f, \f, \d +.endm + + /* + * special cased round 1 function + * f1 = d ? b : c = (d & b) + (~d & c) + */ +.macro round1 a, b, c, d, k, m, s + bic tmp, \c, \d // ~d & c +.if 0x100000000 - \k > 0x00ffffff + movz k, #\k & 0xffff + movk k, #\k >> 16, lsl #16 + add k, k, \m // k[i] + m[g] +.elseif 0x100000000 - \k > 0x0000ffff + sub k, \m, #(0x100000000 - \k) & 0xfff000 + sub k, k, #(0x100000000 - \k) & 0xfff +.else + movz k, #0x100000000 - \k + sub k, \m, k +.endif + add \a, \a, k // k[i] + m[g] + a + and f, \b, \d // d & b + add \a, \a, tmp // k[i] + m[g] + a + (~d & c) + add \a, \a, f // k[i] + m[g] + a + (~d & c) + (d & b) + ror \a, \a, #32-\s + add \a, \a, \b +.endm + + /* f = b ^ c ^ d */ +.macro f2 f, b, c, d + eor \f, \c, \d + eor \f, \f, \b +.endm + + /* f = c ^ (b | ~d) */ +.macro f3 f, b, c, d + orn \f, \b, \d + eor \f, \f, \c +.endm + + /* do 4 rounds */ +.macro rounds f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3 + round a, b, c, d, \f, \k0, \m0, \s0 + round d, a, b, c, \f, \k1, \m1, \s1 + round c, d, a, b, \f, \k2, \m2, \s2 + round b, c, d, a, \f, \k3, \m3, \s3 +.endm + + /* do 4 rounds with f0, f1, f2, f3 */ +.macro rounds0 m0, m1, m2, m3, k0, k1, k2, k3 + rounds f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3 +.endm + +.macro rounds1 m0, m1, m2, m3, k0, k1, k2, k3 + round1 a, b, c, d, \k0, \m0, 5 + round1 d, a, b, c, \k1, \m1, 9 + round1 c, d, a, b, \k2, \m2, 14 + round1 b, c, d, a, \k3, \m3, 20 +.endm + +.macro rounds2 m0, m1, m2, m3, k0, k1, k2, k3 + rounds f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3 +.endm + +.macro rounds3 m0, m1, m2, m3, k0, k1, k2, k3 + rounds f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3 +.endm + + /* md5block(MD5_CTX, buf, len) */ +ENTRY(_libmd_md5block) +ctx .req x0 +buf .req x1 +len .req x2 +end .req x2 // aliases len +a .req w3 +b .req w4 +c .req w5 +d .req w6 +f .req w7 +tmp .req w8 +k .req w9 +m0 .req w10 +m1 .req w11 +m2 .req w12 +m3 .req w13 +m4 .req w14 +m5 .req w15 +m6 .req w16 +m7 .req w17 + // x18 is the platform register +m8 .req w19 +m9 .req w20 +m10 .req w21 +m11 .req w22 +m12 .req w23 +m13 .req w24 +m14 .req w25 +m15 .req w26 + +a_ .req m0 +b_ .req m7 +c_ .req m14 +d_ .req m5 + + stp x19, x20, [sp, #-0x40]! + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x20] + stp x25, x26, [sp, #0x30] + + bics len, len, #63 // length in blocks + add end, buf, len // end pointer + + beq .Lend // was len == 0 after BICS? + + ldp a, b, [ctx, #0] + ldp c, d, [ctx, #8] + + /* first eight rounds interleaved with data loads */ +.Lloop: ldp m0, m1, [buf, #0] + round a, b, c, d, f0, 0xd76aa478, m0, 7 + ldp m2, m3, [buf, #8] + round d, a, b, c, f0, 0xe8c7b756, m1, 12 + ldp m4, m5, [buf, #16] + round c, d, a, b, f0, 0x242070db, m2, 17 + ldp m6, m7, [buf, #24] + round b, c, d, a, f0, 0xc1bdceee, m3, 22 + + ldp m8, m9, [buf, #32] + round a, b, c, d, f0, 0xf57c0faf, m4, 7 + ldp m10, m11, [buf, #40] + round d, a, b, c, f0, 0x4787c62a, m5, 12 + ldp m12, m13, [buf, #48] + round c, d, a, b, f0, 0xa8304613, m6, 17 + ldp m14, m15, [buf, #56] + round b, c, d, a, f0, 0xfd469501, m7, 22 + + /* remaining rounds use the roundsX macros */ +// rounds0 m0, m1, m2, m3, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee +// rounds0 m4, m5, m6, m7, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 + rounds0 m8, m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be + rounds0 m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 + + rounds1 m1, m6, m11, m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa + rounds1 m5, m10, m15, m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 + rounds1 m9, m14, m3, m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed + rounds1 m13, m2, m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a + + rounds2 m5, m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c + rounds2 m1, m4, m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 + rounds2 m13, m0, m3, m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 + rounds2 m9, m12, m15, m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 + + rounds3 m0, m7, m14, m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 + rounds3 m12, m3, m10, m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 + rounds3 m8, m15, m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 + rounds3 m4, m11, m2, m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + + ldp a_, b_, [ctx, #0] + ldp c_, d_, [ctx, #8] + add a, a, a_ + add b, b, b_ + add c, c, c_ + add d, d, d_ + stp a, b, [ctx, #0] + stp c, d, [ctx, #8] + + add buf, buf, #64 + cmp buf, end + bne .Lloop + +.Lend: ldp x25, x26, [sp, #0x30] + ldp x23, x24, [sp, #0x20] + ldp x21, x22, [sp, #0x10] + ldp x19, x20, [sp], #0x40 + + ret +END(_libmd_md5block) + +GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) + + .section .note.GNU-stack,"",%progbits diff --git a/sys/crypto/md5/md5block_amd64.S b/sys/crypto/md5/md5block_amd64.S new file mode 100644 --- /dev/null +++ b/sys/crypto/md5/md5block_amd64.S @@ -0,0 +1,388 @@ +/*- + * Copyright (c) 2024 Robert Clausecker + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + + // md5block(MD5_CTX, buf, len) +ENTRY(_libmd_md5block_baseline) +.macro round a, b, c, d, f, k, m, s + \f %ebp, \b, \c, \d + add $\k, \a // a + k[i] + add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g] + add %ebp, \a // a + k[i] + m[g] + f + rol $\s, \a + add \b, \a +.endm + + // f = b ? c : d +.macro f0 f, b, c, d + mov \c, \f + xor \d, \f + and \b, \f + xor \d, \f +.endm + + // f = d ? b : c +.macro f1 f, b, c, d + mov \c, \f + xor \b, \f + and \d, \f + xor \c, \f +.endm + + // f = b ^ c ^ d +.macro f2 f, b, c, d + mov \c, \f + xor \d, \f + xor \b, \f +.endm + + // f = c ^ (b | ~d) +.macro f3 f, b, c, d + mov $-1, \f + xor \d, \f + or \b, \f + xor \c, \f +.endm + + // do 4 rounds +.macro rounds f, p, q, s0, s1, s2, s3, k0, k1, k2, k3 + round %eax, %ebx, %ecx, %edx, \f, \k0, \p*0+\q, \s0 + round %edx, %eax, %ebx, %ecx, \f, \k1, \p*1+\q, \s1 + round %ecx, %edx, %eax, %ebx, \f, \k2, \p*2+\q, \s2 + round %ebx, %ecx, %edx, %eax, \f, \k3, \p*3+\q, \s3 +.endm + + // do 4 rounds with f0, f1, f2, f3 +.macro rounds0 i, k0, k1, k2, k3 + rounds f0, 1, \i, 7, 12, 17, 22, \k0, \k1, \k2, \k3 +.endm + +.macro rounds1 i, k0, k1, k2, k3 + rounds f1, 5, 5*\i+1, 5, 9, 14, 20, \k0, \k1, \k2, \k3 +.endm + +.macro rounds2 i, k0, k1, k2, k3 + rounds f2, 3, 3*\i+5, 4, 11, 16, 23, \k0, \k1, \k2, \k3 +.endm + +.macro rounds3 i, k0, k1, k2, k3 + rounds f3, 7, 7*\i, 6, 10, 15, 21, \k0, \k1, \k2, \k3 +.endm + + push %rbx + push %rbp + push %r12 + + and $~63, %rdx // length in blocks + lea (%rsi, %rdx, 1), %r12 // end pointer + + mov (%rdi), %eax // a + mov 4(%rdi), %ebx // b + mov 8(%rdi), %ecx // c + mov 12(%rdi), %edx // d + + cmp %rsi, %r12 // any data to process? + je .Lend + + .balign 16 +.Lloop: mov %eax, %r8d + mov %ebx, %r9d + mov %ecx, %r10d + mov %edx, %r11d + + rounds0 0, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee + rounds0 4, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 + rounds0 8, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be + rounds0 12, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 + + rounds1 16, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa + rounds1 20, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 + rounds1 24, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed + rounds1 28, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a + + rounds2 32, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c + rounds2 36, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 + rounds2 40, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 + rounds2 44, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 + + rounds3 48, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 + rounds3 52, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 + rounds3 56, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 + rounds3 60, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + + add %r8d, %eax + add %r9d, %ebx + add %r10d, %ecx + add %r11d, %edx + + add $64, %rsi + cmp %rsi, %r12 + jne .Lloop + + mov %eax, (%rdi) + mov %ebx, 4(%rdi) + mov %ecx, 8(%rdi) + mov %edx, 12(%rdi) + +.Lend: pop %r12 + pop %rbp + pop %rbx + ret +END(_libmd_md5block_baseline) + + /* + * An implementation leveraging the ANDN instruction + * from BMI1 to shorten some dependency chains. + */ +ENTRY(_libmd_md5block_bmi1) + // special-cased round 1 + // f1 = d ? b : c = (d & b) + (~d & c) +.macro round1 a, b, c, d, k, m, s + andn \c, \d, %edi // ~d & c + add $\k, \a // a + k[i] + mov \d, %ebp + add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g] + and \b, %ebp // d & b + add %edi, \a // a + k[i] + m[g] + (~d & c) + add %ebp, \a // a + k[i] + m[g] + (~d & c) + (d & b) + rol $\s, \a + add \b, \a +.endm + + // special-cased round 3 + // f3 = c ^ (b | ~d) = ~(c ^ ~b & d) = -1 - (c ^ ~b & d) +.macro round3 a, b, c, d, k, m, s + andn \d, \b, %ebp + add $\k - 1, \a // a + k[i] - 1 + add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g] + xor \c, %ebp + sub %ebp, \a // a + k[i] + m[g] + f + rol $\s, \a + add \b, \a +.endm + + .purgem rounds1 +.macro rounds1 i, k0, k1, k2, k3 + round1 %eax, %ebx, %ecx, %edx, \k0, 5*\i+ 1, 5 + round1 %edx, %eax, %ebx, %ecx, \k1, 5*\i+ 6, 9 + round1 %ecx, %edx, %eax, %ebx, \k2, 5*\i+11, 14 + round1 %ebx, %ecx, %edx, %eax, \k3, 5*\i+16, 20 +.endm + + .purgem rounds3 +.macro rounds3 i, k0, k1, k2, k3 + round3 %eax, %ebx, %ecx, %edx, \k0, 7*\i+ 0, 6 + round3 %edx, %eax, %ebx, %ecx, \k1, 7*\i+ 7, 10 + round3 %ecx, %edx, %eax, %ebx, \k2, 7*\i+14, 15 + round3 %ebx, %ecx, %edx, %eax, \k3, 7*\i+21, 21 +.endm + + push %rbx + push %rbp + push %r12 + + and $~63, %rdx // length in blocks + lea (%rsi, %rdx, 1), %r12 // end pointer + + mov (%rdi), %eax // a + mov 4(%rdi), %ebx // b + mov 8(%rdi), %ecx // c + mov 12(%rdi), %edx // d + + cmp %rsi, %r12 // any data to process? + je 0f + + push %rdi + + .balign 16 +1: mov %eax, %r8d + mov %ebx, %r9d + mov %ecx, %r10d + mov %edx, %r11d + + rounds0 0, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee + rounds0 4, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 + rounds0 8, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be + rounds0 12, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 + + rounds1 16, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa + rounds1 20, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 + rounds1 24, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed + rounds1 28, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a + + rounds2 32, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c + rounds2 36, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 + rounds2 40, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 + rounds2 44, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 + + rounds3 48, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 + rounds3 52, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 + rounds3 56, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 + rounds3 60, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + + add %r8d, %eax + add %r9d, %ebx + add %r10d, %ecx + add %r11d, %edx + + add $64, %rsi + cmp %rsi, %r12 + jne 1b + + pop %rdi + mov %eax, (%rdi) + mov %ebx, 4(%rdi) + mov %ecx, 8(%rdi) + mov %edx, 12(%rdi) + +0: pop %r12 + pop %rbp + pop %rbx + ret +END(_libmd_md5block_bmi1) + +#ifndef _KERNEL + /* + * An implementation leveraging AVX-512 for its VPTERNLOGD + * instruction. We're using only XMM registers here, + * avoiding costly thermal licensing. + */ +ENTRY(_libmd_md5block_avx512) +.macro vround a, b, c, d, f, i, m, mi, s + vmovdqa \d, %xmm4 + vpternlogd $\f, \b, \c, %xmm4 + vpaddd 4*(\i)(%rax){1to4}, \m, %xmm5 // m[g] + k[i] +.if \mi != 0 + vpshufd $0x55 * \mi, %xmm5, %xmm5 // broadcast to each dword +.endif + vpaddd %xmm5, \a, \a // a + k[i] + m[g] + vpaddd %xmm4, \a, \a // a + k[i] + m[g] + f + vprold $\s, \a, \a + vpaddd \b, \a, \a +.endm + +.macro vrounds f, i, m0, i0, m1, i1, m2, i2, m3, i3, s0, s1, s2, s3 + vround %xmm0, %xmm1, %xmm2, %xmm3, \f, \i+0, \m0, \i0, \s0 + vround %xmm3, %xmm0, %xmm1, %xmm2, \f, \i+1, \m1, \i1, \s1 + vround %xmm2, %xmm3, %xmm0, %xmm1, \f, \i+2, \m2, \i2, \s2 + vround %xmm1, %xmm2, %xmm3, %xmm0, \f, \i+3, \m3, \i3, \s3 +.endm + +/* + * d c b f0 f1 f2 f3 + * 0 0 0 0 0 0 1 + * 0 0 1 0 0 1 1 + * 0 1 0 0 1 1 0 + * 0 1 1 1 1 0 0 + * 1 0 0 1 0 1 0 + * 1 0 1 0 1 0 1 + * 1 1 0 1 0 0 1 + * 1 1 1 1 1 1 0 + */ +.macro vrounds0 i, m + vrounds 0xd8, \i, \m, 0, \m, 1, \m, 2, \m, 3, 7, 12, 17, 22 +.endm + +.macro vrounds1 i, m0, i0, m1, i1, m2, i2, m3, i3 + vrounds 0xac, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 5, 9, 14, 20 +.endm + +.macro vrounds2 i, m0, i0, m1, i1, m2, i2, m3, i3 + vrounds 0x96, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 4, 11, 16, 23 +.endm + +.macro vrounds3 i, m0, i0, m1, i1, m2, i2, m3, i3 + vrounds 0x63, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 6, 10, 15, 21 +.endm + + and $~63, %rdx // length in blocks + add %rsi, %rdx // end pointer + + vmovd (%rdi), %xmm0 // a + vmovd 4(%rdi), %xmm1 // b + vmovd 8(%rdi), %xmm2 // c + vmovd 12(%rdi), %xmm3 // d + + lea keys(%rip), %rax + + cmp %rsi, %rdx // any data to process? + je 0f + + .balign 16 +1: vmovdqu 0*4(%rsi), %xmm8 // message words + vmovdqu 4*4(%rsi), %xmm9 + vmovdqu 8*4(%rsi), %xmm10 + vmovdqu 12*4(%rsi), %xmm11 + + vmovdqa %xmm0, %xmm12 // stash old state variables + vmovdqa %xmm1, %xmm13 + vmovdqa %xmm2, %xmm14 + vmovdqa %xmm3, %xmm15 + + vrounds0 0, %xmm8 + vrounds0 4, %xmm9 + vrounds0 8, %xmm10 + vrounds0 12, %xmm11 + + vrounds1 16, %xmm8, 1, %xmm9, 2, %xmm10, 3, %xmm8, 0 + vrounds1 20, %xmm9, 1, %xmm10, 2, %xmm11, 3, %xmm9, 0 + vrounds1 24, %xmm10, 1, %xmm11, 2, %xmm8, 3, %xmm10, 0 + vrounds1 28, %xmm11, 1, %xmm8, 2, %xmm9, 3, %xmm11, 0 + + vrounds2 32, %xmm9, 1, %xmm10, 0, %xmm10, 3, %xmm11, 2 + vrounds2 36, %xmm8, 1, %xmm9, 0, %xmm9, 3, %xmm10, 2 + vrounds2 40, %xmm11, 1, %xmm8, 0, %xmm8, 3, %xmm9, 2 + vrounds2 44 %xmm10, 1, %xmm11, 0, %xmm11, 3, %xmm8, 2 + + vrounds3 48, %xmm8, 0, %xmm9, 3, %xmm11, 2, %xmm9, 1 + vrounds3 52, %xmm11, 0, %xmm8, 3, %xmm10, 2, %xmm8, 1 + vrounds3 56, %xmm10, 0, %xmm11, 3, %xmm9, 2, %xmm11, 1 + vrounds3 60, %xmm9, 0, %xmm10, 3, %xmm8, 2, %xmm10, 1 + + vpaddd %xmm12, %xmm0, %xmm0 + vpaddd %xmm13, %xmm1, %xmm1 + vpaddd %xmm14, %xmm2, %xmm2 + vpaddd %xmm15, %xmm3, %xmm3 + + add $64, %rsi + cmp %rsi, %rdx + jne 1b + + vmovd %xmm0, (%rdi) + vmovd %xmm1, 4(%rdi) + vmovd %xmm2, 8(%rdi) + vmovd %xmm3, 12(%rdi) + +0: ret +END(_libmd_md5block_avx512) + + // round keys, for use in md5block_avx512 + .section .rodata + .balign 16 +keys: .4byte 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee + .4byte 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 + .4byte 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be + .4byte 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 + + .4byte 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa + .4byte 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 + .4byte 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed + .4byte 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a + + .4byte 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c + .4byte 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 + .4byte 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 + .4byte 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 + + .4byte 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 + .4byte 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 + .4byte 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 + .4byte 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 + .size keys, .-keys +#endif /* !defined(_KERNEL) */ + + .section .note.GNU-stack,"",%progbits diff --git a/sys/crypto/md5/md5c.c b/sys/crypto/md5/md5c.c new file mode 100644 --- /dev/null +++ b/sys/crypto/md5/md5c.c @@ -0,0 +1,234 @@ +/*- + * Copyright (c) 2024 Robert Clausecker + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +#ifdef _KERNEL +#include +#include +#include +#define assert(expr) MPASS(expr) +#else +#include +#include +#include +#include +#endif /* defined(_KERNEL) */ + +#define md5block _libmd_md5block +#ifdef MD5_ASM +extern void md5block(MD5_CTX *, const void *, size_t); +#else +static void md5block(MD5_CTX *, const void *, size_t); +#endif + +void +MD5Init(MD5_CTX *ctx) +{ + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xefcdab89; + ctx->state[2] = 0x98badcfe; + ctx->state[3] = 0x10325476; + + ctx->count[0] = 0; + ctx->count[1] = 0; +} + +void +MD5Update(MD5_CTX *ctx, const void *data, unsigned int len) +{ + uint64_t nn; + const char *p = data; + unsigned num; + + num = ctx->count[0] % MD5_BLOCK_LENGTH; + nn = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32; + nn += len; + ctx->count[0] = (uint32_t)nn; + ctx->count[1] = (uint32_t)(nn >> 32); + + if (num > 0) { + unsigned int n = MD5_BLOCK_LENGTH - num; + + if (n > len) + n = len; + + memcpy((char *)ctx->buffer + num, p, n); + num += n; + if (num == MD5_BLOCK_LENGTH) + md5block(ctx, (void *)ctx->buffer, MD5_BLOCK_LENGTH); + + p += n; + len -= n; + } + + if (len >= MD5_BLOCK_LENGTH) { + unsigned n = len & ~(unsigned)(MD5_BLOCK_LENGTH - 1); + + md5block(ctx, p, n); + p += n; + len -= n; + } + + if (len > 0) + memcpy((void *)ctx->buffer, p, len); +} + +static void +MD5Pad(MD5_CTX *ctx) +{ + uint64_t len; + unsigned t; + unsigned char tmp[MD5_BLOCK_LENGTH + sizeof(uint64_t)] = {0x80, 0}; + + len = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32; + t = 64 + 56 - ctx->count[0] % 64; + if (t > 64) + t -= 64; + + /* length in bits */ + len <<= 3; + le64enc(tmp + t, len); + MD5Update(ctx, tmp, t + 8); + assert(ctx->count[0] % MD5_BLOCK_LENGTH == 0); +} + +void +MD5Final(unsigned char md[16], MD5_CTX *ctx) +{ + MD5Pad(ctx); + + le32enc(md + 0, ctx->state[0]); + le32enc(md + 4, ctx->state[1]); + le32enc(md + 8, ctx->state[2]); + le32enc(md + 12, ctx->state[3]); + + explicit_bzero(ctx, sizeof(ctx)); +} + +#ifndef MD5_ASM +static const uint32_t K[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +}; + +static inline uint32_t +rol32(uint32_t a, int b) +{ + return (a << b | a >> (32 - b)); +} + +static void +md5block(MD5_CTX *ctx, const void *data, size_t len) +{ + uint32_t m[16], a0, b0, c0, d0; + const char *p = data; + + a0 = ctx->state[0]; + b0 = ctx->state[1]; + c0 = ctx->state[2]; + d0 = ctx->state[3]; + + while (len >= MD5_BLOCK_LENGTH) { + size_t i; + uint32_t a = a0, b = b0, c = c0, d = d0, f, tmp; + +# pragma unroll + for (i = 0; i < 16; i++) + m[i] = le32dec(p + 4*i); + +# pragma unroll + for (i = 0; i < 16; i++) { + const int s[] = { 7, 12, 17, 22 }; + + f = d ^ (b & (c ^ d)); + tmp = d; + d = c; + c = b; + b += rol32(a + f + K[i] + m[i], s[i % 4]); + a = tmp; + } + +# pragma unroll + for (; i < 32; i++) { + const int s[] = { 5, 9, 14, 20 }; + + f = c ^ (d & (b ^ c)); + tmp = d; + d = c; + c = b; + b += rol32(a + f + K[i] + m[(5*i + 1) % 16], s[i % 4]); + a = tmp; + } + +# pragma unroll + for (; i < 48; i++) { + const int s[] = { 4, 11, 16, 23 }; + + f = b ^ c ^ d; + tmp = d; + d = c; + c = b; + b += rol32(a + f + K[i] + m[(3*i + 5) % 16], s[i % 4]); + a = tmp; + } + +# pragma unroll + for (; i < 64; i++) { + const int s[] = { 6, 10, 15, 21 }; + + f = c ^ (b | ~d); + tmp = d; + d = c; + c = b; + b += rol32(a + f + K[i] + m[7*i % 16], s[i % 4]); + a = tmp; + } + + a0 += a; + b0 += b; + c0 += c; + d0 += d; + + p += MD5_BLOCK_LENGTH; + len -= MD5_BLOCK_LENGTH; + } + + ctx->state[0] = a0; + ctx->state[1] = b0; + ctx->state[2] = c0; + ctx->state[3] = d0; +} +#endif /* defined(MD5_ASM) */ + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef MD5Init +__weak_reference(_libmd_MD5Init, MD5Init); +#undef MD5Update +__weak_reference(_libmd_MD5Update, MD5Update); +#undef MD5Final +__weak_reference(_libmd_MD5Final, MD5Final); +#endif diff --git a/sys/crypto/md5/md5dispatch_amd64.c b/sys/crypto/md5/md5dispatch_amd64.c new file mode 100644 --- /dev/null +++ b/sys/crypto/md5/md5dispatch_amd64.c @@ -0,0 +1,33 @@ +/*- + * Copyright (c) 2024 Robert Clausecker + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +extern void _libmd_md5block_baseline(MD5_CTX *, const void *, size_t); +extern void _libmd_md5block_bmi1(MD5_CTX *, const void *, size_t); +extern void _libmd_md5block_avx512(MD5_CTX *, const void *, size_t); + +DEFINE_UIFUNC(, void, _libmd_md5block, (MD5_CTX *, const void *, size_t)) +{ + /* + * AVX-512 would need to be turned on first in the kernel + * and that's too expensive; the BMI1 kernel is plenty fast + * and doesn't require any special registers to run. + */ +#ifndef _KERNEL + if ((cpu_stdext_feature & (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL)) + == (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL)) + return (_libmd_md5block_avx512); +#endif + + if (cpu_stdext_feature & CPUID_STDEXT_BMI1) + return (_libmd_md5block_bmi1); + else + return (_libmd_md5block_baseline); +} diff --git a/sys/kern/md5c.c b/sys/kern/md5c.c deleted file mode 100644 --- a/sys/kern/md5c.c +++ /dev/null @@ -1,341 +0,0 @@ -/*- - * SPDX-License-Identifier: RSA-MD - * - * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm - * - * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All - * rights reserved. - * - * License to copy and use this software is granted provided that it - * is identified as the "RSA Data Security, Inc. MD5 Message-Digest - * Algorithm" in all material mentioning or referencing this software - * or this function. - * - * License is also granted to make and use derivative works provided - * that such works are identified as "derived from the RSA Data - * Security, Inc. MD5 Message-Digest Algorithm" in all material - * mentioning or referencing the derived work. - * - * RSA Data Security, Inc. makes no representations concerning either - * the merchantability of this software or the suitability of this - * software for any particular purpose. It is provided "as is" - * without express or implied warranty of any kind. - * - * These notices must be retained in any copies of any part of this - * documentation and/or software. - * - * This code is the same as the code published by RSA Inc. It has been - * edited for clarity and style only. - */ - -#include - -#ifdef _KERNEL -#include -#else -#include -#endif - -#include -#include -#include - -static void MD5Transform(uint32_t [4], const unsigned char [64]); - -#if (BYTE_ORDER == LITTLE_ENDIAN) -#define Encode memcpy -#define Decode memcpy -#else - -/* - * Encodes input (uint32_t) into output (unsigned char). Assumes len is - * a multiple of 4. - */ - -static void -Encode (unsigned char *output, uint32_t *input, unsigned int len) -{ - unsigned int i; - uint32_t ip; - - for (i = 0; i < len / 4; i++) { - ip = input[i]; - *output++ = ip; - *output++ = ip >> 8; - *output++ = ip >> 16; - *output++ = ip >> 24; - } -} - -/* - * Decodes input (unsigned char) into output (uint32_t). Assumes len is - * a multiple of 4. - */ - -static void -Decode (uint32_t *output, const unsigned char *input, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i += 4) { - *output++ = input[i] | (input[i+1] << 8) | (input[i+2] << 16) | - (input[i+3] << 24); - } -} -#endif - -static unsigned char PADDING[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* F, G, H and I are basic MD5 functions. */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) -#define I(x, y, z) ((y) ^ ((x) | (~z))) - -/* ROTATE_LEFT rotates x left n bits. */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) - -/* - * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. - * Rotation is separate from addition to prevent recomputation. - */ -#define FF(a, b, c, d, x, s, ac) { \ - (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define GG(a, b, c, d, x, s, ac) { \ - (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define HH(a, b, c, d, x, s, ac) { \ - (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define II(a, b, c, d, x, s, ac) { \ - (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } - -/* MD5 initialization. Begins an MD5 operation, writing a new context. */ - -void -MD5Init(MD5_CTX *context) -{ - - context->count[0] = context->count[1] = 0; - - /* Load magic initialization constants. */ - context->state[0] = 0x67452301; - context->state[1] = 0xefcdab89; - context->state[2] = 0x98badcfe; - context->state[3] = 0x10325476; -} - -/* - * MD5 block update operation. Continues an MD5 message-digest - * operation, processing another message block, and updating the - * context. - */ - -void -MD5Update(MD5_CTX *context, const void *in, unsigned int inputLen) -{ - unsigned int i, index, partLen; - const unsigned char *input = in; - - /* Compute number of bytes mod 64 */ - index = (unsigned int)((context->count[0] >> 3) & 0x3F); - - /* Update number of bits */ - if ((context->count[0] += ((uint32_t)inputLen << 3)) - < ((uint32_t)inputLen << 3)) - context->count[1]++; - context->count[1] += ((uint32_t)inputLen >> 29); - - partLen = 64 - index; - - /* Transform as many times as possible. */ - if (inputLen >= partLen) { - memcpy((void *)&context->buffer[index], (const void *)input, - partLen); - MD5Transform (context->state, context->buffer); - - for (i = partLen; i + 63 < inputLen; i += 64) - MD5Transform (context->state, &input[i]); - - index = 0; - } - else - i = 0; - - /* Buffer remaining input */ - memcpy ((void *)&context->buffer[index], (const void *)&input[i], - inputLen-i); -} - -/* - * MD5 padding. Adds padding followed by original length. - */ - -static void -MD5Pad(MD5_CTX *context) -{ - unsigned char bits[8]; - unsigned int index, padLen; - - /* Save number of bits */ - Encode (bits, context->count, 8); - - /* Pad out to 56 mod 64. */ - index = (unsigned int)((context->count[0] >> 3) & 0x3f); - padLen = (index < 56) ? (56 - index) : (120 - index); - MD5Update (context, PADDING, padLen); - - /* Append length (before padding) */ - MD5Update (context, bits, 8); -} - -/* - * MD5 finalization. Ends an MD5 message-digest operation, writing the - * the message digest and zeroizing the context. - */ - -void -MD5Final(unsigned char digest[static MD5_DIGEST_LENGTH], MD5_CTX *context) -{ - /* Do padding. */ - MD5Pad (context); - - /* Store state in digest */ - Encode (digest, context->state, MD5_DIGEST_LENGTH); - - /* Zeroize sensitive information. */ - explicit_bzero (context, sizeof (*context)); -} - -/* MD5 basic transformation. Transforms state based on block. */ - -static void -MD5Transform(uint32_t state[4], const unsigned char block[64]) -{ - uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - - Decode (x, block, 64); - - /* Round 1 */ -#define S11 7 -#define S12 12 -#define S13 17 -#define S14 22 - FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ - FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ - FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ - FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ - FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ - FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ - FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ - FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ - FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ - FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ - FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ - FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ - FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ - FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ - FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ - FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ - - /* Round 2 */ -#define S21 5 -#define S22 9 -#define S23 14 -#define S24 20 - GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ - GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ - GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ - GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ - GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ - GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ - GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ - GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ - GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ - GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ - GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ - GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ - GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ - GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ - GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ - GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ - - /* Round 3 */ -#define S31 4 -#define S32 11 -#define S33 16 -#define S34 23 - HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ - HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ - HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ - HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ - HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ - HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ - HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ - HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ - HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ - HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ - HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ - HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ - HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ - HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ - HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ - HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ - - /* Round 4 */ -#define S41 6 -#define S42 10 -#define S43 15 -#define S44 21 - II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ - II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ - II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ - II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ - II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ - II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ - II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ - II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ - II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ - II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ - II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ - II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ - II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ - II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ - II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ - II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - - /* Zeroize sensitive information. */ - memset ((void *)x, 0, sizeof (x)); -} - -#ifdef WEAK_REFS -/* When building libmd, provide weak references. Note: this is not - activated in the context of compiling these sources for internal - use in libcrypt. - */ -#undef MD5Init -__weak_reference(_libmd_MD5Init, MD5Init); -#undef MD5Update -__weak_reference(_libmd_MD5Update, MD5Update); -#undef MD5Final -__weak_reference(_libmd_MD5Final, MD5Final); -#endif