diff --git a/module/icp/algs/sha2/sha256_impl.c b/module/icp/algs/sha2/sha256_impl.c index 01ce5cbd814c..0f24319511d7 100644 --- a/module/icp/algs/sha2/sha256_impl.c +++ b/module/icp/algs/sha2/sha256_impl.c @@ -1,309 +1,313 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2022 Tino Reichardt */ #include #include #include #include #include #include #define TF(E, N) \ extern void ASMABI E(uint32_t s[8], const void *, size_t); \ static inline void N(uint32_t s[8], const void *d, size_t b) { \ kfpu_begin(); E(s, d, b); kfpu_end(); \ } /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } #if defined(__x86_64) /* Users of ASMABI requires all calls to be from wrappers */ extern void ASMABI zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t); static inline void tf_sha256_transform_x64(uint32_t s[8], const void *d, size_t b) { zfs_sha256_transform_x64(s, d, b); } const sha256_ops_t sha256_x64_impl = { .is_supported = sha2_is_supported, .transform = tf_sha256_transform_x64, .name = "x64" }; #if defined(HAVE_SSSE3) static boolean_t sha2_have_ssse3(void) { return (kfpu_allowed() && zfs_ssse3_available()); } TF(zfs_sha256_transform_ssse3, tf_sha256_ssse3); const sha256_ops_t sha256_ssse3_impl = { .is_supported = sha2_have_ssse3, .transform = tf_sha256_ssse3, .name = "ssse3" }; #endif #if defined(HAVE_AVX) static boolean_t sha2_have_avx(void) { return (kfpu_allowed() && zfs_avx_available()); } TF(zfs_sha256_transform_avx, tf_sha256_avx); const sha256_ops_t sha256_avx_impl = { .is_supported = sha2_have_avx, .transform = tf_sha256_avx, .name = "avx" }; #endif #if defined(HAVE_AVX2) static boolean_t sha2_have_avx2(void) { return (kfpu_allowed() && zfs_avx2_available()); } TF(zfs_sha256_transform_avx2, tf_sha256_avx2); const sha256_ops_t sha256_avx2_impl = { .is_supported = sha2_have_avx2, .transform = tf_sha256_avx2, .name = "avx2" }; #endif #if defined(HAVE_SSE4_1) static boolean_t sha2_have_shani(void) { return (kfpu_allowed() && zfs_sse4_1_available() && \ zfs_shani_available()); } TF(zfs_sha256_transform_shani, tf_sha256_shani); const sha256_ops_t sha256_shani_impl = { .is_supported = sha2_have_shani, .transform = tf_sha256_shani, .name = "shani" }; #endif -#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6) +#elif defined(__aarch64__) || defined(__arm__) +extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t); +const sha256_ops_t sha256_armv7_impl = { + .is_supported = sha2_is_supported, + .transform = zfs_sha256_block_armv7, + .name = "armv7" +}; + +#if __ARM_ARCH > 6 static boolean_t sha256_have_neon(void) { return (kfpu_allowed() && zfs_neon_available()); } static boolean_t sha256_have_armv8ce(void) { return (kfpu_allowed() && zfs_sha256_available()); } -extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t); -const sha256_ops_t sha256_armv7_impl = { - .is_supported = sha2_is_supported, - .transform = zfs_sha256_block_armv7, - .name = "armv7" -}; - TF(zfs_sha256_block_neon, tf_sha256_neon); const sha256_ops_t sha256_neon_impl = { .is_supported = sha256_have_neon, .transform = tf_sha256_neon, .name = "neon" }; TF(zfs_sha256_block_armv8, tf_sha256_armv8ce); const sha256_ops_t sha256_armv8_impl = { .is_supported = sha256_have_armv8ce, .transform = tf_sha256_armv8ce, .name = "armv8-ce" }; +#endif #elif defined(__PPC64__) static boolean_t sha256_have_isa207(void) { return (kfpu_allowed() && zfs_isa207_available()); } TF(zfs_sha256_ppc, tf_sha256_ppc); const sha256_ops_t sha256_ppc_impl = { .is_supported = sha2_is_supported, .transform = tf_sha256_ppc, .name = "ppc" }; TF(zfs_sha256_power8, tf_sha256_power8); const sha256_ops_t sha256_power8_impl = { .is_supported = sha256_have_isa207, .transform = tf_sha256_power8, .name = "power8" }; #endif /* __PPC64__ */ /* the two generic ones */ extern const sha256_ops_t sha256_generic_impl; /* array with all sha256 implementations */ static const sha256_ops_t *const sha256_impls[] = { &sha256_generic_impl, #if defined(__x86_64) &sha256_x64_impl, #endif #if defined(__x86_64) && defined(HAVE_SSSE3) &sha256_ssse3_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX) &sha256_avx_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX2) &sha256_avx2_impl, #endif #if defined(__x86_64) && defined(HAVE_SSE4_1) &sha256_shani_impl, #endif -#if defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6) +#if defined(__aarch64__) || defined(__arm__) &sha256_armv7_impl, +#if __ARM_ARCH > 6 &sha256_neon_impl, &sha256_armv8_impl, #endif +#endif #if defined(__PPC64__) &sha256_ppc_impl, &sha256_power8_impl, #endif /* __PPC64__ */ }; /* use the generic implementation functions */ #define IMPL_NAME "sha256" #define IMPL_OPS_T sha256_ops_t #define IMPL_ARRAY sha256_impls #define IMPL_GET_OPS sha256_get_ops #define ZFS_IMPL_OPS zfs_sha256_ops #include #ifdef _KERNEL #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") #if defined(__linux__) static int sha256_param_get(char *buffer, zfs_kernel_param_t *unused) { const uint32_t impl = IMPL_READ(generic_impl_chosen); char *fmt; int cnt = 0; /* cycling */ fmt = IMPL_FMT(impl, IMPL_CYCLE); cnt += sprintf(buffer + cnt, fmt, "cycle"); /* list fastest */ fmt = IMPL_FMT(impl, IMPL_FASTEST); cnt += sprintf(buffer + cnt, fmt, "fastest"); /* list all supported implementations */ generic_impl_init(); for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); cnt += sprintf(buffer + cnt, fmt, generic_supp_impls[i]->name); } return (cnt); } static int sha256_param_set(const char *val, zfs_kernel_param_t *unused) { (void) unused; return (generic_impl_setname(val)); } #elif defined(__FreeBSD__) #include static int sha256_param(ZFS_MODULE_PARAM_ARGS) { int err; generic_impl_init(); if (req->newptr == NULL) { const uint32_t impl = IMPL_READ(generic_impl_chosen); const int init_buflen = 64; const char *fmt; struct sbuf *s; s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); /* cycling */ fmt = IMPL_FMT(impl, IMPL_CYCLE); (void) sbuf_printf(s, fmt, "cycle"); /* list fastest */ fmt = IMPL_FMT(impl, IMPL_FASTEST); (void) sbuf_printf(s, fmt, "fastest"); /* list all supported implementations */ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); } err = sbuf_finish(s); sbuf_delete(s); return (err); } char buf[16]; err = sysctl_handle_string(oidp, buf, sizeof (buf), req); if (err) { return (err); } return (-generic_impl_setname(buf)); } #endif #undef IMPL_FMT ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha256_impl, sha256_param_set, sha256_param_get, ZMOD_RW, \ "Select SHA256 implementation."); #endif #undef TF diff --git a/module/icp/algs/sha2/sha512_impl.c b/module/icp/algs/sha2/sha512_impl.c index 27b35a639a54..6291fbd77e36 100644 --- a/module/icp/algs/sha2/sha512_impl.c +++ b/module/icp/algs/sha2/sha512_impl.c @@ -1,285 +1,282 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2022 Tino Reichardt */ #include #include #include #include #include #include #define TF(E, N) \ extern void ASMABI E(uint64_t s[8], const void *, size_t); \ static inline void N(uint64_t s[8], const void *d, size_t b) { \ kfpu_begin(); E(s, d, b); kfpu_end(); \ } /* some implementation is always okay */ static inline boolean_t sha2_is_supported(void) { return (B_TRUE); } #if defined(__x86_64) /* Users of ASMABI requires all calls to be from wrappers */ extern void ASMABI zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t); static inline void tf_sha512_transform_x64(uint64_t s[8], const void *d, size_t b) { zfs_sha512_transform_x64(s, d, b); } const sha512_ops_t sha512_x64_impl = { .is_supported = sha2_is_supported, .transform = tf_sha512_transform_x64, .name = "x64" }; #if defined(HAVE_AVX) static boolean_t sha2_have_avx(void) { return (kfpu_allowed() && zfs_avx_available()); } TF(zfs_sha512_transform_avx, tf_sha512_avx); const sha512_ops_t sha512_avx_impl = { .is_supported = sha2_have_avx, .transform = tf_sha512_avx, .name = "avx" }; #endif #if defined(HAVE_AVX2) static boolean_t sha2_have_avx2(void) { return (kfpu_allowed() && zfs_avx2_available()); } TF(zfs_sha512_transform_avx2, tf_sha512_avx2); const sha512_ops_t sha512_avx2_impl = { .is_supported = sha2_have_avx2, .transform = tf_sha512_avx2, .name = "avx2" }; #endif -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(__arm__) extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t); const sha512_ops_t sha512_armv7_impl = { .is_supported = sha2_is_supported, .transform = zfs_sha512_block_armv7, .name = "armv7" }; +#if defined(__aarch64__) static boolean_t sha512_have_armv8ce(void) { return (kfpu_allowed() && zfs_sha512_available()); } TF(zfs_sha512_block_armv8, tf_sha512_armv8ce); const sha512_ops_t sha512_armv8_impl = { .is_supported = sha512_have_armv8ce, .transform = tf_sha512_armv8ce, .name = "armv8-ce" }; +#endif -#elif defined(__arm__) && __ARM_ARCH > 6 -extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t); -const sha512_ops_t sha512_armv7_impl = { - .is_supported = sha2_is_supported, - .transform = zfs_sha512_block_armv7, - .name = "armv7" -}; - +#if defined(__arm__) && __ARM_ARCH > 6 static boolean_t sha512_have_neon(void) { return (kfpu_allowed() && zfs_neon_available()); } TF(zfs_sha512_block_neon, tf_sha512_neon); const sha512_ops_t sha512_neon_impl = { .is_supported = sha512_have_neon, .transform = tf_sha512_neon, .name = "neon" }; +#endif #elif defined(__PPC64__) TF(zfs_sha512_ppc, tf_sha512_ppc); const sha512_ops_t sha512_ppc_impl = { .is_supported = sha2_is_supported, .transform = tf_sha512_ppc, .name = "ppc" }; static boolean_t sha512_have_isa207(void) { return (kfpu_allowed() && zfs_isa207_available()); } TF(zfs_sha512_power8, tf_sha512_power8); const sha512_ops_t sha512_power8_impl = { .is_supported = sha512_have_isa207, .transform = tf_sha512_power8, .name = "power8" }; #endif /* __PPC64__ */ /* the two generic ones */ extern const sha512_ops_t sha512_generic_impl; /* array with all sha512 implementations */ static const sha512_ops_t *const sha512_impls[] = { &sha512_generic_impl, #if defined(__x86_64) &sha512_x64_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX) &sha512_avx_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX2) &sha512_avx2_impl, #endif -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm__) &sha512_armv7_impl, +#if defined(__aarch64__) &sha512_armv8_impl, #endif #if defined(__arm__) && __ARM_ARCH > 6 - &sha512_armv7_impl, &sha512_neon_impl, #endif +#endif #if defined(__PPC64__) &sha512_ppc_impl, &sha512_power8_impl, #endif /* __PPC64__ */ }; /* use the generic implementation functions */ #define IMPL_NAME "sha512" #define IMPL_OPS_T sha512_ops_t #define IMPL_ARRAY sha512_impls #define IMPL_GET_OPS sha512_get_ops #define ZFS_IMPL_OPS zfs_sha512_ops #include #ifdef _KERNEL #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") #if defined(__linux__) static int sha512_param_get(char *buffer, zfs_kernel_param_t *unused) { const uint32_t impl = IMPL_READ(generic_impl_chosen); char *fmt; int cnt = 0; /* cycling */ fmt = IMPL_FMT(impl, IMPL_CYCLE); cnt += sprintf(buffer + cnt, fmt, "cycle"); /* list fastest */ fmt = IMPL_FMT(impl, IMPL_FASTEST); cnt += sprintf(buffer + cnt, fmt, "fastest"); /* list all supported implementations */ generic_impl_init(); for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); cnt += sprintf(buffer + cnt, fmt, generic_supp_impls[i]->name); } return (cnt); } static int sha512_param_set(const char *val, zfs_kernel_param_t *unused) { (void) unused; return (generic_impl_setname(val)); } #elif defined(__FreeBSD__) #include static int sha512_param(ZFS_MODULE_PARAM_ARGS) { int err; generic_impl_init(); if (req->newptr == NULL) { const uint32_t impl = IMPL_READ(generic_impl_chosen); const int init_buflen = 64; const char *fmt; struct sbuf *s; s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); /* cycling */ fmt = IMPL_FMT(impl, IMPL_CYCLE); (void) sbuf_printf(s, fmt, "cycle"); /* list fastest */ fmt = IMPL_FMT(impl, IMPL_FASTEST); (void) sbuf_printf(s, fmt, "fastest"); /* list all supported implementations */ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { fmt = IMPL_FMT(impl, i); (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); } err = sbuf_finish(s); sbuf_delete(s); return (err); } /* we got module parameter */ char buf[16]; err = sysctl_handle_string(oidp, buf, sizeof (buf), req); if (err) { return (err); } return (-generic_impl_setname(buf)); } #endif #undef IMPL_FMT ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha512_impl, sha512_param_set, sha512_param_get, ZMOD_RW, \ "Select SHA512 implementation."); #endif #undef TF diff --git a/module/icp/asm-arm/sha2/sha256-armv7.S b/module/icp/asm-arm/sha2/sha256-armv7.S index 190dbabc5ecb..3ae66626df31 100644 --- a/module/icp/asm-arm/sha2/sha256-armv7.S +++ b/module/icp/asm-arm/sha2/sha256-armv7.S @@ -1,2776 +1,2774 @@ /* * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Portions Copyright (c) 2022 Tino Reichardt * - modified assembly to fit into OpenZFS */ #if defined(__arm__) #ifndef __ARM_ARCH # define __ARM_ARCH__ 7 #else # define __ARM_ARCH__ __ARM_ARCH #endif #if defined(__thumb2__) .syntax unified .thumb #else .code 32 #endif .text .type K256,%object .align 5 K256: .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 .word 0 @ terminator .align 5 .globl zfs_sha256_block_armv7 .type zfs_sha256_block_armv7,%function zfs_sha256_block_armv7: .Lzfs_sha256_block_armv7: #if __ARM_ARCH__<7 && !defined(__thumb2__) sub r3,pc,#8 @ zfs_sha256_block_armv7 #else adr r3,.Lzfs_sha256_block_armv7 #endif add r2,r1,r2,lsl#6 @ len to point at the end of inp stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} sub r14,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: # if __ARM_ARCH__>=7 ldr r2,[r1],#4 # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ magic eor r12,r12,r12 #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 0 # if 0==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r8,r8,ror#5 add r4,r4,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r8,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 0 add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 0==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r8,r8,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r8,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r11,r11,r2 @ h+=X[i] str r2,[sp,#0*4] eor r2,r9,r10 add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) eor r0,r4,r4,ror#11 add r11,r11,r2 @ h+=Ch(e,f,g) #if 0==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 0<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r4,r5 @ a^b, b^c in next round #else ldr r2,[sp,#2*4] @ from future BODY_16_xx eor r12,r4,r5 @ a^b, b^c in next round ldr r1,[sp,#15*4] @ from future BODY_16_xx #endif eor r0,r0,r4,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r7,r7,r11 @ d+=h eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 1 # if 1==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r7,r7,ror#5 add r11,r11,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r7,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 1 add r11,r11,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 1==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r7,r7,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r7,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r10,r10,r2 @ h+=X[i] str r2,[sp,#1*4] eor r2,r8,r9 add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) eor r0,r11,r11,ror#11 add r10,r10,r2 @ h+=Ch(e,f,g) #if 1==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 1<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r11,r4 @ a^b, b^c in next round #else ldr r2,[sp,#3*4] @ from future BODY_16_xx eor r3,r11,r4 @ a^b, b^c in next round ldr r1,[sp,#0*4] @ from future BODY_16_xx #endif eor r0,r0,r11,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r6,r6,r10 @ d+=h eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 2 # if 2==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r6,r6,ror#5 add r10,r10,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r6,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 2 add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 2==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r6,r6,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r6,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r9,r9,r2 @ h+=X[i] str r2,[sp,#2*4] eor r2,r7,r8 add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) eor r0,r10,r10,ror#11 add r9,r9,r2 @ h+=Ch(e,f,g) #if 2==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 2<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r10,r11 @ a^b, b^c in next round #else ldr r2,[sp,#4*4] @ from future BODY_16_xx eor r12,r10,r11 @ a^b, b^c in next round ldr r1,[sp,#1*4] @ from future BODY_16_xx #endif eor r0,r0,r10,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r5,r5,r9 @ d+=h eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 3 # if 3==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r5,r5,ror#5 add r9,r9,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r5,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 3 add r9,r9,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 3==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r5,r5,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r5,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r8,r8,r2 @ h+=X[i] str r2,[sp,#3*4] eor r2,r6,r7 add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) eor r0,r9,r9,ror#11 add r8,r8,r2 @ h+=Ch(e,f,g) #if 3==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 3<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r9,r10 @ a^b, b^c in next round #else ldr r2,[sp,#5*4] @ from future BODY_16_xx eor r3,r9,r10 @ a^b, b^c in next round ldr r1,[sp,#2*4] @ from future BODY_16_xx #endif eor r0,r0,r9,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r4,r4,r8 @ d+=h eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 4 # if 4==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r4,r4,ror#5 add r8,r8,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r4,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 4 add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 4==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r4,r4,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r4,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r7,r7,r2 @ h+=X[i] str r2,[sp,#4*4] eor r2,r5,r6 add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) eor r0,r8,r8,ror#11 add r7,r7,r2 @ h+=Ch(e,f,g) #if 4==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 4<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r8,r9 @ a^b, b^c in next round #else ldr r2,[sp,#6*4] @ from future BODY_16_xx eor r12,r8,r9 @ a^b, b^c in next round ldr r1,[sp,#3*4] @ from future BODY_16_xx #endif eor r0,r0,r8,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r11,r11,r7 @ d+=h eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 5 # if 5==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r11,r11,ror#5 add r7,r7,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r11,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 5 add r7,r7,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 5==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r11,r11,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r11,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r6,r6,r2 @ h+=X[i] str r2,[sp,#5*4] eor r2,r4,r5 add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) eor r0,r7,r7,ror#11 add r6,r6,r2 @ h+=Ch(e,f,g) #if 5==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 5<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r7,r8 @ a^b, b^c in next round #else ldr r2,[sp,#7*4] @ from future BODY_16_xx eor r3,r7,r8 @ a^b, b^c in next round ldr r1,[sp,#4*4] @ from future BODY_16_xx #endif eor r0,r0,r7,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r10,r10,r6 @ d+=h eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 6 # if 6==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r10,r10,ror#5 add r6,r6,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r10,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 6 add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 6==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r10,r10,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r10,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r5,r5,r2 @ h+=X[i] str r2,[sp,#6*4] eor r2,r11,r4 add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) eor r0,r6,r6,ror#11 add r5,r5,r2 @ h+=Ch(e,f,g) #if 6==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 6<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r6,r7 @ a^b, b^c in next round #else ldr r2,[sp,#8*4] @ from future BODY_16_xx eor r12,r6,r7 @ a^b, b^c in next round ldr r1,[sp,#5*4] @ from future BODY_16_xx #endif eor r0,r0,r6,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r9,r9,r5 @ d+=h eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 7 # if 7==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r9,r9,ror#5 add r5,r5,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r9,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 7 add r5,r5,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 7==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r9,r9,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r9,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r4,r4,r2 @ h+=X[i] str r2,[sp,#7*4] eor r2,r10,r11 add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) eor r0,r5,r5,ror#11 add r4,r4,r2 @ h+=Ch(e,f,g) #if 7==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 7<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ a^b, b^c in next round #else ldr r2,[sp,#9*4] @ from future BODY_16_xx eor r3,r5,r6 @ a^b, b^c in next round ldr r1,[sp,#6*4] @ from future BODY_16_xx #endif eor r0,r0,r5,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r8,r8,r4 @ d+=h eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 8 # if 8==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r8,r8,ror#5 add r4,r4,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r8,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 8 add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 8==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r8,r8,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r8,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r11,r11,r2 @ h+=X[i] str r2,[sp,#8*4] eor r2,r9,r10 add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) eor r0,r4,r4,ror#11 add r11,r11,r2 @ h+=Ch(e,f,g) #if 8==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 8<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r4,r5 @ a^b, b^c in next round #else ldr r2,[sp,#10*4] @ from future BODY_16_xx eor r12,r4,r5 @ a^b, b^c in next round ldr r1,[sp,#7*4] @ from future BODY_16_xx #endif eor r0,r0,r4,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r7,r7,r11 @ d+=h eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 9 # if 9==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r7,r7,ror#5 add r11,r11,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r7,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 9 add r11,r11,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 9==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r7,r7,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r7,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r10,r10,r2 @ h+=X[i] str r2,[sp,#9*4] eor r2,r8,r9 add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) eor r0,r11,r11,ror#11 add r10,r10,r2 @ h+=Ch(e,f,g) #if 9==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 9<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r11,r4 @ a^b, b^c in next round #else ldr r2,[sp,#11*4] @ from future BODY_16_xx eor r3,r11,r4 @ a^b, b^c in next round ldr r1,[sp,#8*4] @ from future BODY_16_xx #endif eor r0,r0,r11,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r6,r6,r10 @ d+=h eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 10 # if 10==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r6,r6,ror#5 add r10,r10,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r6,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 10 add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 10==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r6,r6,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r6,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r9,r9,r2 @ h+=X[i] str r2,[sp,#10*4] eor r2,r7,r8 add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) eor r0,r10,r10,ror#11 add r9,r9,r2 @ h+=Ch(e,f,g) #if 10==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 10<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r10,r11 @ a^b, b^c in next round #else ldr r2,[sp,#12*4] @ from future BODY_16_xx eor r12,r10,r11 @ a^b, b^c in next round ldr r1,[sp,#9*4] @ from future BODY_16_xx #endif eor r0,r0,r10,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r5,r5,r9 @ d+=h eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 11 # if 11==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r5,r5,ror#5 add r9,r9,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r5,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 11 add r9,r9,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 11==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r5,r5,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r5,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r8,r8,r2 @ h+=X[i] str r2,[sp,#11*4] eor r2,r6,r7 add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) eor r0,r9,r9,ror#11 add r8,r8,r2 @ h+=Ch(e,f,g) #if 11==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 11<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r9,r10 @ a^b, b^c in next round #else ldr r2,[sp,#13*4] @ from future BODY_16_xx eor r3,r9,r10 @ a^b, b^c in next round ldr r1,[sp,#10*4] @ from future BODY_16_xx #endif eor r0,r0,r9,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r4,r4,r8 @ d+=h eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 12 # if 12==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r4,r4,ror#5 add r8,r8,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r4,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 12 add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 12==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r4,r4,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r4,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r7,r7,r2 @ h+=X[i] str r2,[sp,#12*4] eor r2,r5,r6 add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) eor r0,r8,r8,ror#11 add r7,r7,r2 @ h+=Ch(e,f,g) #if 12==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 12<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r8,r9 @ a^b, b^c in next round #else ldr r2,[sp,#14*4] @ from future BODY_16_xx eor r12,r8,r9 @ a^b, b^c in next round ldr r1,[sp,#11*4] @ from future BODY_16_xx #endif eor r0,r0,r8,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r11,r11,r7 @ d+=h eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 13 # if 13==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r11,r11,ror#5 add r7,r7,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r11,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 13 add r7,r7,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 13==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r11,r11,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r11,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r6,r6,r2 @ h+=X[i] str r2,[sp,#13*4] eor r2,r4,r5 add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) eor r0,r7,r7,ror#11 add r6,r6,r2 @ h+=Ch(e,f,g) #if 13==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 13<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r7,r8 @ a^b, b^c in next round #else ldr r2,[sp,#15*4] @ from future BODY_16_xx eor r3,r7,r8 @ a^b, b^c in next round ldr r1,[sp,#12*4] @ from future BODY_16_xx #endif eor r0,r0,r7,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r10,r10,r6 @ d+=h eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 14 # if 14==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r10,r10,ror#5 add r6,r6,r12 @ h+=Maj(a,b,c) from the past eor r0,r0,r10,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 14 add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r12,lsl#8 ldrb r12,[r1],#4 orr r2,r2,r0,lsl#16 # if 14==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r10,r10,ror#5 orr r2,r2,r12,lsl#24 eor r0,r0,r10,ror#19 @ Sigma1(e) #endif ldr r12,[r14],#4 @ *K256++ add r5,r5,r2 @ h+=X[i] str r2,[sp,#14*4] eor r2,r11,r4 add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) eor r0,r6,r6,ror#11 add r5,r5,r2 @ h+=Ch(e,f,g) #if 14==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 14<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r6,r7 @ a^b, b^c in next round #else ldr r2,[sp,#0*4] @ from future BODY_16_xx eor r12,r6,r7 @ a^b, b^c in next round ldr r1,[sp,#13*4] @ from future BODY_16_xx #endif eor r0,r0,r6,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r9,r9,r5 @ d+=h eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 @ ldr r2,[r1],#4 @ 15 # if 15==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r9,r9,ror#5 add r5,r5,r3 @ h+=Maj(a,b,c) from the past eor r0,r0,r9,ror#19 @ Sigma1(e) # ifndef __ARMEB__ rev r2,r2 # endif #else @ ldrb r2,[r1,#3] @ 15 add r5,r5,r3 @ h+=Maj(a,b,c) from the past ldrb r3,[r1,#2] ldrb r0,[r1,#1] orr r2,r2,r3,lsl#8 ldrb r3,[r1],#4 orr r2,r2,r0,lsl#16 # if 15==15 str r1,[sp,#17*4] @ make room for r1 # endif eor r0,r9,r9,ror#5 orr r2,r2,r3,lsl#24 eor r0,r0,r9,ror#19 @ Sigma1(e) #endif ldr r3,[r14],#4 @ *K256++ add r4,r4,r2 @ h+=X[i] str r2,[sp,#15*4] eor r2,r10,r11 add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) eor r0,r5,r5,ror#11 add r4,r4,r2 @ h+=Ch(e,f,g) #if 15==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 15<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ a^b, b^c in next round #else ldr r2,[sp,#1*4] @ from future BODY_16_xx eor r3,r5,r6 @ a^b, b^c in next round ldr r1,[sp,#14*4] @ from future BODY_16_xx #endif eor r0,r0,r5,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r8,r8,r4 @ d+=h eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) .Lrounds_16_xx: @ ldr r2,[sp,#1*4] @ 16 @ ldr r1,[sp,#14*4] mov r0,r2,ror#7 add r4,r4,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#0*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#9*4] add r12,r12,r0 eor r0,r8,r8,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r8,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r11,r11,r2 @ h+=X[i] str r2,[sp,#0*4] eor r2,r9,r10 add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) eor r0,r4,r4,ror#11 add r11,r11,r2 @ h+=Ch(e,f,g) #if 16==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 16<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r4,r5 @ a^b, b^c in next round #else ldr r2,[sp,#2*4] @ from future BODY_16_xx eor r12,r4,r5 @ a^b, b^c in next round ldr r1,[sp,#15*4] @ from future BODY_16_xx #endif eor r0,r0,r4,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r7,r7,r11 @ d+=h eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#2*4] @ 17 @ ldr r1,[sp,#15*4] mov r0,r2,ror#7 add r11,r11,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#1*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#10*4] add r3,r3,r0 eor r0,r7,r7,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r7,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r10,r10,r2 @ h+=X[i] str r2,[sp,#1*4] eor r2,r8,r9 add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) eor r0,r11,r11,ror#11 add r10,r10,r2 @ h+=Ch(e,f,g) #if 17==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 17<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r11,r4 @ a^b, b^c in next round #else ldr r2,[sp,#3*4] @ from future BODY_16_xx eor r3,r11,r4 @ a^b, b^c in next round ldr r1,[sp,#0*4] @ from future BODY_16_xx #endif eor r0,r0,r11,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r6,r6,r10 @ d+=h eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#3*4] @ 18 @ ldr r1,[sp,#0*4] mov r0,r2,ror#7 add r10,r10,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#2*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#11*4] add r12,r12,r0 eor r0,r6,r6,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r6,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r9,r9,r2 @ h+=X[i] str r2,[sp,#2*4] eor r2,r7,r8 add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) eor r0,r10,r10,ror#11 add r9,r9,r2 @ h+=Ch(e,f,g) #if 18==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 18<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r10,r11 @ a^b, b^c in next round #else ldr r2,[sp,#4*4] @ from future BODY_16_xx eor r12,r10,r11 @ a^b, b^c in next round ldr r1,[sp,#1*4] @ from future BODY_16_xx #endif eor r0,r0,r10,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r5,r5,r9 @ d+=h eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#4*4] @ 19 @ ldr r1,[sp,#1*4] mov r0,r2,ror#7 add r9,r9,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#3*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#12*4] add r3,r3,r0 eor r0,r5,r5,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r5,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r8,r8,r2 @ h+=X[i] str r2,[sp,#3*4] eor r2,r6,r7 add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) eor r0,r9,r9,ror#11 add r8,r8,r2 @ h+=Ch(e,f,g) #if 19==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 19<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r9,r10 @ a^b, b^c in next round #else ldr r2,[sp,#5*4] @ from future BODY_16_xx eor r3,r9,r10 @ a^b, b^c in next round ldr r1,[sp,#2*4] @ from future BODY_16_xx #endif eor r0,r0,r9,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r4,r4,r8 @ d+=h eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#5*4] @ 20 @ ldr r1,[sp,#2*4] mov r0,r2,ror#7 add r8,r8,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#4*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#13*4] add r12,r12,r0 eor r0,r4,r4,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r4,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r7,r7,r2 @ h+=X[i] str r2,[sp,#4*4] eor r2,r5,r6 add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) eor r0,r8,r8,ror#11 add r7,r7,r2 @ h+=Ch(e,f,g) #if 20==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 20<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r8,r9 @ a^b, b^c in next round #else ldr r2,[sp,#6*4] @ from future BODY_16_xx eor r12,r8,r9 @ a^b, b^c in next round ldr r1,[sp,#3*4] @ from future BODY_16_xx #endif eor r0,r0,r8,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r11,r11,r7 @ d+=h eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#6*4] @ 21 @ ldr r1,[sp,#3*4] mov r0,r2,ror#7 add r7,r7,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#5*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#14*4] add r3,r3,r0 eor r0,r11,r11,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r11,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r6,r6,r2 @ h+=X[i] str r2,[sp,#5*4] eor r2,r4,r5 add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) eor r0,r7,r7,ror#11 add r6,r6,r2 @ h+=Ch(e,f,g) #if 21==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 21<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r7,r8 @ a^b, b^c in next round #else ldr r2,[sp,#7*4] @ from future BODY_16_xx eor r3,r7,r8 @ a^b, b^c in next round ldr r1,[sp,#4*4] @ from future BODY_16_xx #endif eor r0,r0,r7,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r10,r10,r6 @ d+=h eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#7*4] @ 22 @ ldr r1,[sp,#4*4] mov r0,r2,ror#7 add r6,r6,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#6*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#15*4] add r12,r12,r0 eor r0,r10,r10,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r10,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r5,r5,r2 @ h+=X[i] str r2,[sp,#6*4] eor r2,r11,r4 add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) eor r0,r6,r6,ror#11 add r5,r5,r2 @ h+=Ch(e,f,g) #if 22==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 22<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r6,r7 @ a^b, b^c in next round #else ldr r2,[sp,#8*4] @ from future BODY_16_xx eor r12,r6,r7 @ a^b, b^c in next round ldr r1,[sp,#5*4] @ from future BODY_16_xx #endif eor r0,r0,r6,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r9,r9,r5 @ d+=h eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#8*4] @ 23 @ ldr r1,[sp,#5*4] mov r0,r2,ror#7 add r5,r5,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#7*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#0*4] add r3,r3,r0 eor r0,r9,r9,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r9,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r4,r4,r2 @ h+=X[i] str r2,[sp,#7*4] eor r2,r10,r11 add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) eor r0,r5,r5,ror#11 add r4,r4,r2 @ h+=Ch(e,f,g) #if 23==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 23<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ a^b, b^c in next round #else ldr r2,[sp,#9*4] @ from future BODY_16_xx eor r3,r5,r6 @ a^b, b^c in next round ldr r1,[sp,#6*4] @ from future BODY_16_xx #endif eor r0,r0,r5,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r8,r8,r4 @ d+=h eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#9*4] @ 24 @ ldr r1,[sp,#6*4] mov r0,r2,ror#7 add r4,r4,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#8*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#1*4] add r12,r12,r0 eor r0,r8,r8,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r8,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r11,r11,r2 @ h+=X[i] str r2,[sp,#8*4] eor r2,r9,r10 add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) eor r0,r4,r4,ror#11 add r11,r11,r2 @ h+=Ch(e,f,g) #if 24==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 24<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r4,r5 @ a^b, b^c in next round #else ldr r2,[sp,#10*4] @ from future BODY_16_xx eor r12,r4,r5 @ a^b, b^c in next round ldr r1,[sp,#7*4] @ from future BODY_16_xx #endif eor r0,r0,r4,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r7,r7,r11 @ d+=h eor r3,r3,r5 @ Maj(a,b,c) add r11,r11,r0,ror#2 @ h+=Sigma0(a) @ add r11,r11,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#10*4] @ 25 @ ldr r1,[sp,#7*4] mov r0,r2,ror#7 add r11,r11,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#9*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#2*4] add r3,r3,r0 eor r0,r7,r7,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r7,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r10,r10,r2 @ h+=X[i] str r2,[sp,#9*4] eor r2,r8,r9 add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) eor r0,r11,r11,ror#11 add r10,r10,r2 @ h+=Ch(e,f,g) #if 25==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 25<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r11,r4 @ a^b, b^c in next round #else ldr r2,[sp,#11*4] @ from future BODY_16_xx eor r3,r11,r4 @ a^b, b^c in next round ldr r1,[sp,#8*4] @ from future BODY_16_xx #endif eor r0,r0,r11,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r6,r6,r10 @ d+=h eor r12,r12,r4 @ Maj(a,b,c) add r10,r10,r0,ror#2 @ h+=Sigma0(a) @ add r10,r10,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#11*4] @ 26 @ ldr r1,[sp,#8*4] mov r0,r2,ror#7 add r10,r10,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#10*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#3*4] add r12,r12,r0 eor r0,r6,r6,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r6,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r9,r9,r2 @ h+=X[i] str r2,[sp,#10*4] eor r2,r7,r8 add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) eor r0,r10,r10,ror#11 add r9,r9,r2 @ h+=Ch(e,f,g) #if 26==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 26<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r10,r11 @ a^b, b^c in next round #else ldr r2,[sp,#12*4] @ from future BODY_16_xx eor r12,r10,r11 @ a^b, b^c in next round ldr r1,[sp,#9*4] @ from future BODY_16_xx #endif eor r0,r0,r10,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r5,r5,r9 @ d+=h eor r3,r3,r11 @ Maj(a,b,c) add r9,r9,r0,ror#2 @ h+=Sigma0(a) @ add r9,r9,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#12*4] @ 27 @ ldr r1,[sp,#9*4] mov r0,r2,ror#7 add r9,r9,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#11*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#4*4] add r3,r3,r0 eor r0,r5,r5,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r5,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r8,r8,r2 @ h+=X[i] str r2,[sp,#11*4] eor r2,r6,r7 add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) eor r0,r9,r9,ror#11 add r8,r8,r2 @ h+=Ch(e,f,g) #if 27==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 27<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r9,r10 @ a^b, b^c in next round #else ldr r2,[sp,#13*4] @ from future BODY_16_xx eor r3,r9,r10 @ a^b, b^c in next round ldr r1,[sp,#10*4] @ from future BODY_16_xx #endif eor r0,r0,r9,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r4,r4,r8 @ d+=h eor r12,r12,r10 @ Maj(a,b,c) add r8,r8,r0,ror#2 @ h+=Sigma0(a) @ add r8,r8,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#13*4] @ 28 @ ldr r1,[sp,#10*4] mov r0,r2,ror#7 add r8,r8,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#12*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#5*4] add r12,r12,r0 eor r0,r4,r4,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r4,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r7,r7,r2 @ h+=X[i] str r2,[sp,#12*4] eor r2,r5,r6 add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) eor r0,r8,r8,ror#11 add r7,r7,r2 @ h+=Ch(e,f,g) #if 28==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 28<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r8,r9 @ a^b, b^c in next round #else ldr r2,[sp,#14*4] @ from future BODY_16_xx eor r12,r8,r9 @ a^b, b^c in next round ldr r1,[sp,#11*4] @ from future BODY_16_xx #endif eor r0,r0,r8,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r11,r11,r7 @ d+=h eor r3,r3,r9 @ Maj(a,b,c) add r7,r7,r0,ror#2 @ h+=Sigma0(a) @ add r7,r7,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#14*4] @ 29 @ ldr r1,[sp,#11*4] mov r0,r2,ror#7 add r7,r7,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#13*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#6*4] add r3,r3,r0 eor r0,r11,r11,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r11,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r6,r6,r2 @ h+=X[i] str r2,[sp,#13*4] eor r2,r4,r5 add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) eor r0,r7,r7,ror#11 add r6,r6,r2 @ h+=Ch(e,f,g) #if 29==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 29<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r7,r8 @ a^b, b^c in next round #else ldr r2,[sp,#15*4] @ from future BODY_16_xx eor r3,r7,r8 @ a^b, b^c in next round ldr r1,[sp,#12*4] @ from future BODY_16_xx #endif eor r0,r0,r7,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r10,r10,r6 @ d+=h eor r12,r12,r8 @ Maj(a,b,c) add r6,r6,r0,ror#2 @ h+=Sigma0(a) @ add r6,r6,r12 @ h+=Maj(a,b,c) @ ldr r2,[sp,#15*4] @ 30 @ ldr r1,[sp,#12*4] mov r0,r2,ror#7 add r6,r6,r12 @ h+=Maj(a,b,c) from the past mov r12,r1,ror#17 eor r0,r0,r2,ror#18 eor r12,r12,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#14*4] eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#7*4] add r12,r12,r0 eor r0,r10,r10,ror#5 @ from BODY_00_15 add r2,r2,r12 eor r0,r0,r10,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ add r5,r5,r2 @ h+=X[i] str r2,[sp,#14*4] eor r2,r11,r4 add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) eor r0,r6,r6,ror#11 add r5,r5,r2 @ h+=Ch(e,f,g) #if 30==31 and r12,r12,#0xff cmp r12,#0xf2 @ done? #endif #if 30<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r12,r6,r7 @ a^b, b^c in next round #else ldr r2,[sp,#0*4] @ from future BODY_16_xx eor r12,r6,r7 @ a^b, b^c in next round ldr r1,[sp,#13*4] @ from future BODY_16_xx #endif eor r0,r0,r6,ror#20 @ Sigma0(a) and r3,r3,r12 @ (b^c)&=(a^b) add r9,r9,r5 @ d+=h eor r3,r3,r7 @ Maj(a,b,c) add r5,r5,r0,ror#2 @ h+=Sigma0(a) @ add r5,r5,r3 @ h+=Maj(a,b,c) @ ldr r2,[sp,#0*4] @ 31 @ ldr r1,[sp,#13*4] mov r0,r2,ror#7 add r5,r5,r3 @ h+=Maj(a,b,c) from the past mov r3,r1,ror#17 eor r0,r0,r2,ror#18 eor r3,r3,r1,ror#19 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ldr r2,[sp,#15*4] eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ldr r1,[sp,#8*4] add r3,r3,r0 eor r0,r9,r9,ror#5 @ from BODY_00_15 add r2,r2,r3 eor r0,r0,r9,ror#19 @ Sigma1(e) add r2,r2,r1 @ X[i] ldr r3,[r14],#4 @ *K256++ add r4,r4,r2 @ h+=X[i] str r2,[sp,#15*4] eor r2,r10,r11 add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) eor r0,r5,r5,ror#11 add r4,r4,r2 @ h+=Ch(e,f,g) #if 31==31 and r3,r3,#0xff cmp r3,#0xf2 @ done? #endif #if 31<15 # if __ARM_ARCH__>=7 ldr r2,[r1],#4 @ prefetch # else ldrb r2,[r1,#3] # endif eor r3,r5,r6 @ a^b, b^c in next round #else ldr r2,[sp,#1*4] @ from future BODY_16_xx eor r3,r5,r6 @ a^b, b^c in next round ldr r1,[sp,#14*4] @ from future BODY_16_xx #endif eor r0,r0,r5,ror#20 @ Sigma0(a) and r12,r12,r3 @ (b^c)&=(a^b) add r8,r8,r4 @ d+=h eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) #ifdef __thumb2__ ite eq @ Thumb2 thing, sanity check in ARM #endif ldreq r3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldr r0,[r3,#0] ldr r2,[r3,#4] ldr r12,[r3,#8] add r4,r4,r0 ldr r0,[r3,#12] add r5,r5,r2 ldr r2,[r3,#16] add r6,r6,r12 ldr r12,[r3,#20] add r7,r7,r0 ldr r0,[r3,#24] add r8,r8,r2 ldr r2,[r3,#28] add r9,r9,r12 ldr r1,[sp,#17*4] @ pull inp ldr r12,[sp,#18*4] @ pull inp+len add r10,r10,r0 add r11,r11,r2 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} cmp r1,r12 sub r14,r14,#256 @ rewind Ktbl bne .Loop add sp,sp,#19*4 @ destroy frame #if __ARM_ARCH__>=5 ldmia sp!,{r4-r11,pc} #else ldmia sp!,{r4-r11,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7 +#if __ARM_ARCH__ >= 7 .arch armv7-a .fpu neon .globl zfs_sha256_block_neon .type zfs_sha256_block_neon,%function .align 5 .skip 16 zfs_sha256_block_neon: .LNEON: stmdb sp!,{r4-r12,lr} sub r11,sp,#16*4+16 -#if __ARM_ARCH__ >=7 adr r14,K256 -#else - ldr r14,=K256 -#endif bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca add r2,r1,r2,lsl#6 @ len to point at the end of inp vld1.8 {q0},[r1]! vld1.8 {q1},[r1]! vld1.8 {q2},[r1]! vld1.8 {q3},[r1]! vld1.32 {q8},[r14,:128]! vld1.32 {q9},[r14,:128]! vld1.32 {q10},[r14,:128]! vld1.32 {q11},[r14,:128]! vrev32.8 q0,q0 @ yes, even on str r0,[sp,#64] vrev32.8 q1,q1 @ big-endian str r1,[sp,#68] mov r1,sp vrev32.8 q2,q2 str r2,[sp,#72] vrev32.8 q3,q3 str r12,[sp,#76] @ save original sp vadd.i32 q8,q8,q0 vadd.i32 q9,q9,q1 vst1.32 {q8},[r1,:128]! vadd.i32 q10,q10,q2 vst1.32 {q9},[r1,:128]! vadd.i32 q11,q11,q3 vst1.32 {q10},[r1,:128]! vst1.32 {q11},[r1,:128]! ldmia r0,{r4-r11} sub r1,r1,#64 ldr r2,[sp,#0] eor r12,r12,r12 eor r3,r5,r6 b .L_00_48 .align 4 .L_00_48: vext.8 q8,q0,q1,#4 add r11,r11,r2 eor r2,r9,r10 eor r0,r8,r8,ror#5 vext.8 q9,q2,q3,#4 add r4,r4,r12 and r2,r2,r8 eor r12,r0,r8,ror#19 vshr.u32 q10,q8,#7 eor r0,r4,r4,ror#11 eor r2,r2,r10 vadd.i32 q0,q0,q9 add r11,r11,r12,ror#6 eor r12,r4,r5 vshr.u32 q9,q8,#3 eor r0,r0,r4,ror#20 add r11,r11,r2 vsli.32 q10,q8,#25 ldr r2,[sp,#4] and r3,r3,r12 vshr.u32 q11,q8,#18 add r7,r7,r11 add r11,r11,r0,ror#2 eor r3,r3,r5 veor q9,q9,q10 add r10,r10,r2 vsli.32 q11,q8,#14 eor r2,r8,r9 eor r0,r7,r7,ror#5 vshr.u32 d24,d7,#17 add r11,r11,r3 and r2,r2,r7 veor q9,q9,q11 eor r3,r0,r7,ror#19 eor r0,r11,r11,ror#11 vsli.32 d24,d7,#15 eor r2,r2,r9 add r10,r10,r3,ror#6 vshr.u32 d25,d7,#10 eor r3,r11,r4 eor r0,r0,r11,ror#20 vadd.i32 q0,q0,q9 add r10,r10,r2 ldr r2,[sp,#8] veor d25,d25,d24 and r12,r12,r3 add r6,r6,r10 vshr.u32 d24,d7,#19 add r10,r10,r0,ror#2 eor r12,r12,r4 vsli.32 d24,d7,#13 add r9,r9,r2 eor r2,r7,r8 veor d25,d25,d24 eor r0,r6,r6,ror#5 add r10,r10,r12 vadd.i32 d0,d0,d25 and r2,r2,r6 eor r12,r0,r6,ror#19 vshr.u32 d24,d0,#17 eor r0,r10,r10,ror#11 eor r2,r2,r8 vsli.32 d24,d0,#15 add r9,r9,r12,ror#6 eor r12,r10,r11 vshr.u32 d25,d0,#10 eor r0,r0,r10,ror#20 add r9,r9,r2 veor d25,d25,d24 ldr r2,[sp,#12] and r3,r3,r12 vshr.u32 d24,d0,#19 add r5,r5,r9 add r9,r9,r0,ror#2 eor r3,r3,r11 vld1.32 {q8},[r14,:128]! add r8,r8,r2 vsli.32 d24,d0,#13 eor r2,r6,r7 eor r0,r5,r5,ror#5 veor d25,d25,d24 add r9,r9,r3 and r2,r2,r5 vadd.i32 d1,d1,d25 eor r3,r0,r5,ror#19 eor r0,r9,r9,ror#11 vadd.i32 q8,q8,q0 eor r2,r2,r7 add r8,r8,r3,ror#6 eor r3,r9,r10 eor r0,r0,r9,ror#20 add r8,r8,r2 ldr r2,[sp,#16] and r12,r12,r3 add r4,r4,r8 vst1.32 {q8},[r1,:128]! add r8,r8,r0,ror#2 eor r12,r12,r10 vext.8 q8,q1,q2,#4 add r7,r7,r2 eor r2,r5,r6 eor r0,r4,r4,ror#5 vext.8 q9,q3,q0,#4 add r8,r8,r12 and r2,r2,r4 eor r12,r0,r4,ror#19 vshr.u32 q10,q8,#7 eor r0,r8,r8,ror#11 eor r2,r2,r6 vadd.i32 q1,q1,q9 add r7,r7,r12,ror#6 eor r12,r8,r9 vshr.u32 q9,q8,#3 eor r0,r0,r8,ror#20 add r7,r7,r2 vsli.32 q10,q8,#25 ldr r2,[sp,#20] and r3,r3,r12 vshr.u32 q11,q8,#18 add r11,r11,r7 add r7,r7,r0,ror#2 eor r3,r3,r9 veor q9,q9,q10 add r6,r6,r2 vsli.32 q11,q8,#14 eor r2,r4,r5 eor r0,r11,r11,ror#5 vshr.u32 d24,d1,#17 add r7,r7,r3 and r2,r2,r11 veor q9,q9,q11 eor r3,r0,r11,ror#19 eor r0,r7,r7,ror#11 vsli.32 d24,d1,#15 eor r2,r2,r5 add r6,r6,r3,ror#6 vshr.u32 d25,d1,#10 eor r3,r7,r8 eor r0,r0,r7,ror#20 vadd.i32 q1,q1,q9 add r6,r6,r2 ldr r2,[sp,#24] veor d25,d25,d24 and r12,r12,r3 add r10,r10,r6 vshr.u32 d24,d1,#19 add r6,r6,r0,ror#2 eor r12,r12,r8 vsli.32 d24,d1,#13 add r5,r5,r2 eor r2,r11,r4 veor d25,d25,d24 eor r0,r10,r10,ror#5 add r6,r6,r12 vadd.i32 d2,d2,d25 and r2,r2,r10 eor r12,r0,r10,ror#19 vshr.u32 d24,d2,#17 eor r0,r6,r6,ror#11 eor r2,r2,r4 vsli.32 d24,d2,#15 add r5,r5,r12,ror#6 eor r12,r6,r7 vshr.u32 d25,d2,#10 eor r0,r0,r6,ror#20 add r5,r5,r2 veor d25,d25,d24 ldr r2,[sp,#28] and r3,r3,r12 vshr.u32 d24,d2,#19 add r9,r9,r5 add r5,r5,r0,ror#2 eor r3,r3,r7 vld1.32 {q8},[r14,:128]! add r4,r4,r2 vsli.32 d24,d2,#13 eor r2,r10,r11 eor r0,r9,r9,ror#5 veor d25,d25,d24 add r5,r5,r3 and r2,r2,r9 vadd.i32 d3,d3,d25 eor r3,r0,r9,ror#19 eor r0,r5,r5,ror#11 vadd.i32 q8,q8,q1 eor r2,r2,r11 add r4,r4,r3,ror#6 eor r3,r5,r6 eor r0,r0,r5,ror#20 add r4,r4,r2 ldr r2,[sp,#32] and r12,r12,r3 add r8,r8,r4 vst1.32 {q8},[r1,:128]! add r4,r4,r0,ror#2 eor r12,r12,r6 vext.8 q8,q2,q3,#4 add r11,r11,r2 eor r2,r9,r10 eor r0,r8,r8,ror#5 vext.8 q9,q0,q1,#4 add r4,r4,r12 and r2,r2,r8 eor r12,r0,r8,ror#19 vshr.u32 q10,q8,#7 eor r0,r4,r4,ror#11 eor r2,r2,r10 vadd.i32 q2,q2,q9 add r11,r11,r12,ror#6 eor r12,r4,r5 vshr.u32 q9,q8,#3 eor r0,r0,r4,ror#20 add r11,r11,r2 vsli.32 q10,q8,#25 ldr r2,[sp,#36] and r3,r3,r12 vshr.u32 q11,q8,#18 add r7,r7,r11 add r11,r11,r0,ror#2 eor r3,r3,r5 veor q9,q9,q10 add r10,r10,r2 vsli.32 q11,q8,#14 eor r2,r8,r9 eor r0,r7,r7,ror#5 vshr.u32 d24,d3,#17 add r11,r11,r3 and r2,r2,r7 veor q9,q9,q11 eor r3,r0,r7,ror#19 eor r0,r11,r11,ror#11 vsli.32 d24,d3,#15 eor r2,r2,r9 add r10,r10,r3,ror#6 vshr.u32 d25,d3,#10 eor r3,r11,r4 eor r0,r0,r11,ror#20 vadd.i32 q2,q2,q9 add r10,r10,r2 ldr r2,[sp,#40] veor d25,d25,d24 and r12,r12,r3 add r6,r6,r10 vshr.u32 d24,d3,#19 add r10,r10,r0,ror#2 eor r12,r12,r4 vsli.32 d24,d3,#13 add r9,r9,r2 eor r2,r7,r8 veor d25,d25,d24 eor r0,r6,r6,ror#5 add r10,r10,r12 vadd.i32 d4,d4,d25 and r2,r2,r6 eor r12,r0,r6,ror#19 vshr.u32 d24,d4,#17 eor r0,r10,r10,ror#11 eor r2,r2,r8 vsli.32 d24,d4,#15 add r9,r9,r12,ror#6 eor r12,r10,r11 vshr.u32 d25,d4,#10 eor r0,r0,r10,ror#20 add r9,r9,r2 veor d25,d25,d24 ldr r2,[sp,#44] and r3,r3,r12 vshr.u32 d24,d4,#19 add r5,r5,r9 add r9,r9,r0,ror#2 eor r3,r3,r11 vld1.32 {q8},[r14,:128]! add r8,r8,r2 vsli.32 d24,d4,#13 eor r2,r6,r7 eor r0,r5,r5,ror#5 veor d25,d25,d24 add r9,r9,r3 and r2,r2,r5 vadd.i32 d5,d5,d25 eor r3,r0,r5,ror#19 eor r0,r9,r9,ror#11 vadd.i32 q8,q8,q2 eor r2,r2,r7 add r8,r8,r3,ror#6 eor r3,r9,r10 eor r0,r0,r9,ror#20 add r8,r8,r2 ldr r2,[sp,#48] and r12,r12,r3 add r4,r4,r8 vst1.32 {q8},[r1,:128]! add r8,r8,r0,ror#2 eor r12,r12,r10 vext.8 q8,q3,q0,#4 add r7,r7,r2 eor r2,r5,r6 eor r0,r4,r4,ror#5 vext.8 q9,q1,q2,#4 add r8,r8,r12 and r2,r2,r4 eor r12,r0,r4,ror#19 vshr.u32 q10,q8,#7 eor r0,r8,r8,ror#11 eor r2,r2,r6 vadd.i32 q3,q3,q9 add r7,r7,r12,ror#6 eor r12,r8,r9 vshr.u32 q9,q8,#3 eor r0,r0,r8,ror#20 add r7,r7,r2 vsli.32 q10,q8,#25 ldr r2,[sp,#52] and r3,r3,r12 vshr.u32 q11,q8,#18 add r11,r11,r7 add r7,r7,r0,ror#2 eor r3,r3,r9 veor q9,q9,q10 add r6,r6,r2 vsli.32 q11,q8,#14 eor r2,r4,r5 eor r0,r11,r11,ror#5 vshr.u32 d24,d5,#17 add r7,r7,r3 and r2,r2,r11 veor q9,q9,q11 eor r3,r0,r11,ror#19 eor r0,r7,r7,ror#11 vsli.32 d24,d5,#15 eor r2,r2,r5 add r6,r6,r3,ror#6 vshr.u32 d25,d5,#10 eor r3,r7,r8 eor r0,r0,r7,ror#20 vadd.i32 q3,q3,q9 add r6,r6,r2 ldr r2,[sp,#56] veor d25,d25,d24 and r12,r12,r3 add r10,r10,r6 vshr.u32 d24,d5,#19 add r6,r6,r0,ror#2 eor r12,r12,r8 vsli.32 d24,d5,#13 add r5,r5,r2 eor r2,r11,r4 veor d25,d25,d24 eor r0,r10,r10,ror#5 add r6,r6,r12 vadd.i32 d6,d6,d25 and r2,r2,r10 eor r12,r0,r10,ror#19 vshr.u32 d24,d6,#17 eor r0,r6,r6,ror#11 eor r2,r2,r4 vsli.32 d24,d6,#15 add r5,r5,r12,ror#6 eor r12,r6,r7 vshr.u32 d25,d6,#10 eor r0,r0,r6,ror#20 add r5,r5,r2 veor d25,d25,d24 ldr r2,[sp,#60] and r3,r3,r12 vshr.u32 d24,d6,#19 add r9,r9,r5 add r5,r5,r0,ror#2 eor r3,r3,r7 vld1.32 {q8},[r14,:128]! add r4,r4,r2 vsli.32 d24,d6,#13 eor r2,r10,r11 eor r0,r9,r9,ror#5 veor d25,d25,d24 add r5,r5,r3 and r2,r2,r9 vadd.i32 d7,d7,d25 eor r3,r0,r9,ror#19 eor r0,r5,r5,ror#11 vadd.i32 q8,q8,q3 eor r2,r2,r11 add r4,r4,r3,ror#6 eor r3,r5,r6 eor r0,r0,r5,ror#20 add r4,r4,r2 ldr r2,[r14] and r12,r12,r3 add r8,r8,r4 vst1.32 {q8},[r1,:128]! add r4,r4,r0,ror#2 eor r12,r12,r6 teq r2,#0 @ check for K256 terminator ldr r2,[sp,#0] sub r1,r1,#64 bne .L_00_48 ldr r1,[sp,#68] ldr r0,[sp,#72] sub r14,r14,#256 @ rewind r14 teq r1,r0 it eq subeq r1,r1,#64 @ avoid SEGV vld1.8 {q0},[r1]! @ load next input block vld1.8 {q1},[r1]! vld1.8 {q2},[r1]! vld1.8 {q3},[r1]! it ne strne r1,[sp,#68] mov r1,sp add r11,r11,r2 eor r2,r9,r10 eor r0,r8,r8,ror#5 add r4,r4,r12 vld1.32 {q8},[r14,:128]! and r2,r2,r8 eor r12,r0,r8,ror#19 eor r0,r4,r4,ror#11 eor r2,r2,r10 vrev32.8 q0,q0 add r11,r11,r12,ror#6 eor r12,r4,r5 eor r0,r0,r4,ror#20 add r11,r11,r2 vadd.i32 q8,q8,q0 ldr r2,[sp,#4] and r3,r3,r12 add r7,r7,r11 add r11,r11,r0,ror#2 eor r3,r3,r5 add r10,r10,r2 eor r2,r8,r9 eor r0,r7,r7,ror#5 add r11,r11,r3 and r2,r2,r7 eor r3,r0,r7,ror#19 eor r0,r11,r11,ror#11 eor r2,r2,r9 add r10,r10,r3,ror#6 eor r3,r11,r4 eor r0,r0,r11,ror#20 add r10,r10,r2 ldr r2,[sp,#8] and r12,r12,r3 add r6,r6,r10 add r10,r10,r0,ror#2 eor r12,r12,r4 add r9,r9,r2 eor r2,r7,r8 eor r0,r6,r6,ror#5 add r10,r10,r12 and r2,r2,r6 eor r12,r0,r6,ror#19 eor r0,r10,r10,ror#11 eor r2,r2,r8 add r9,r9,r12,ror#6 eor r12,r10,r11 eor r0,r0,r10,ror#20 add r9,r9,r2 ldr r2,[sp,#12] and r3,r3,r12 add r5,r5,r9 add r9,r9,r0,ror#2 eor r3,r3,r11 add r8,r8,r2 eor r2,r6,r7 eor r0,r5,r5,ror#5 add r9,r9,r3 and r2,r2,r5 eor r3,r0,r5,ror#19 eor r0,r9,r9,ror#11 eor r2,r2,r7 add r8,r8,r3,ror#6 eor r3,r9,r10 eor r0,r0,r9,ror#20 add r8,r8,r2 ldr r2,[sp,#16] and r12,r12,r3 add r4,r4,r8 add r8,r8,r0,ror#2 eor r12,r12,r10 vst1.32 {q8},[r1,:128]! add r7,r7,r2 eor r2,r5,r6 eor r0,r4,r4,ror#5 add r8,r8,r12 vld1.32 {q8},[r14,:128]! and r2,r2,r4 eor r12,r0,r4,ror#19 eor r0,r8,r8,ror#11 eor r2,r2,r6 vrev32.8 q1,q1 add r7,r7,r12,ror#6 eor r12,r8,r9 eor r0,r0,r8,ror#20 add r7,r7,r2 vadd.i32 q8,q8,q1 ldr r2,[sp,#20] and r3,r3,r12 add r11,r11,r7 add r7,r7,r0,ror#2 eor r3,r3,r9 add r6,r6,r2 eor r2,r4,r5 eor r0,r11,r11,ror#5 add r7,r7,r3 and r2,r2,r11 eor r3,r0,r11,ror#19 eor r0,r7,r7,ror#11 eor r2,r2,r5 add r6,r6,r3,ror#6 eor r3,r7,r8 eor r0,r0,r7,ror#20 add r6,r6,r2 ldr r2,[sp,#24] and r12,r12,r3 add r10,r10,r6 add r6,r6,r0,ror#2 eor r12,r12,r8 add r5,r5,r2 eor r2,r11,r4 eor r0,r10,r10,ror#5 add r6,r6,r12 and r2,r2,r10 eor r12,r0,r10,ror#19 eor r0,r6,r6,ror#11 eor r2,r2,r4 add r5,r5,r12,ror#6 eor r12,r6,r7 eor r0,r0,r6,ror#20 add r5,r5,r2 ldr r2,[sp,#28] and r3,r3,r12 add r9,r9,r5 add r5,r5,r0,ror#2 eor r3,r3,r7 add r4,r4,r2 eor r2,r10,r11 eor r0,r9,r9,ror#5 add r5,r5,r3 and r2,r2,r9 eor r3,r0,r9,ror#19 eor r0,r5,r5,ror#11 eor r2,r2,r11 add r4,r4,r3,ror#6 eor r3,r5,r6 eor r0,r0,r5,ror#20 add r4,r4,r2 ldr r2,[sp,#32] and r12,r12,r3 add r8,r8,r4 add r4,r4,r0,ror#2 eor r12,r12,r6 vst1.32 {q8},[r1,:128]! add r11,r11,r2 eor r2,r9,r10 eor r0,r8,r8,ror#5 add r4,r4,r12 vld1.32 {q8},[r14,:128]! and r2,r2,r8 eor r12,r0,r8,ror#19 eor r0,r4,r4,ror#11 eor r2,r2,r10 vrev32.8 q2,q2 add r11,r11,r12,ror#6 eor r12,r4,r5 eor r0,r0,r4,ror#20 add r11,r11,r2 vadd.i32 q8,q8,q2 ldr r2,[sp,#36] and r3,r3,r12 add r7,r7,r11 add r11,r11,r0,ror#2 eor r3,r3,r5 add r10,r10,r2 eor r2,r8,r9 eor r0,r7,r7,ror#5 add r11,r11,r3 and r2,r2,r7 eor r3,r0,r7,ror#19 eor r0,r11,r11,ror#11 eor r2,r2,r9 add r10,r10,r3,ror#6 eor r3,r11,r4 eor r0,r0,r11,ror#20 add r10,r10,r2 ldr r2,[sp,#40] and r12,r12,r3 add r6,r6,r10 add r10,r10,r0,ror#2 eor r12,r12,r4 add r9,r9,r2 eor r2,r7,r8 eor r0,r6,r6,ror#5 add r10,r10,r12 and r2,r2,r6 eor r12,r0,r6,ror#19 eor r0,r10,r10,ror#11 eor r2,r2,r8 add r9,r9,r12,ror#6 eor r12,r10,r11 eor r0,r0,r10,ror#20 add r9,r9,r2 ldr r2,[sp,#44] and r3,r3,r12 add r5,r5,r9 add r9,r9,r0,ror#2 eor r3,r3,r11 add r8,r8,r2 eor r2,r6,r7 eor r0,r5,r5,ror#5 add r9,r9,r3 and r2,r2,r5 eor r3,r0,r5,ror#19 eor r0,r9,r9,ror#11 eor r2,r2,r7 add r8,r8,r3,ror#6 eor r3,r9,r10 eor r0,r0,r9,ror#20 add r8,r8,r2 ldr r2,[sp,#48] and r12,r12,r3 add r4,r4,r8 add r8,r8,r0,ror#2 eor r12,r12,r10 vst1.32 {q8},[r1,:128]! add r7,r7,r2 eor r2,r5,r6 eor r0,r4,r4,ror#5 add r8,r8,r12 vld1.32 {q8},[r14,:128]! and r2,r2,r4 eor r12,r0,r4,ror#19 eor r0,r8,r8,ror#11 eor r2,r2,r6 vrev32.8 q3,q3 add r7,r7,r12,ror#6 eor r12,r8,r9 eor r0,r0,r8,ror#20 add r7,r7,r2 vadd.i32 q8,q8,q3 ldr r2,[sp,#52] and r3,r3,r12 add r11,r11,r7 add r7,r7,r0,ror#2 eor r3,r3,r9 add r6,r6,r2 eor r2,r4,r5 eor r0,r11,r11,ror#5 add r7,r7,r3 and r2,r2,r11 eor r3,r0,r11,ror#19 eor r0,r7,r7,ror#11 eor r2,r2,r5 add r6,r6,r3,ror#6 eor r3,r7,r8 eor r0,r0,r7,ror#20 add r6,r6,r2 ldr r2,[sp,#56] and r12,r12,r3 add r10,r10,r6 add r6,r6,r0,ror#2 eor r12,r12,r8 add r5,r5,r2 eor r2,r11,r4 eor r0,r10,r10,ror#5 add r6,r6,r12 and r2,r2,r10 eor r12,r0,r10,ror#19 eor r0,r6,r6,ror#11 eor r2,r2,r4 add r5,r5,r12,ror#6 eor r12,r6,r7 eor r0,r0,r6,ror#20 add r5,r5,r2 ldr r2,[sp,#60] and r3,r3,r12 add r9,r9,r5 add r5,r5,r0,ror#2 eor r3,r3,r7 add r4,r4,r2 eor r2,r10,r11 eor r0,r9,r9,ror#5 add r5,r5,r3 and r2,r2,r9 eor r3,r0,r9,ror#19 eor r0,r5,r5,ror#11 eor r2,r2,r11 add r4,r4,r3,ror#6 eor r3,r5,r6 eor r0,r0,r5,ror#20 add r4,r4,r2 ldr r2,[sp,#64] and r12,r12,r3 add r8,r8,r4 add r4,r4,r0,ror#2 eor r12,r12,r6 vst1.32 {q8},[r1,:128]! ldr r0,[r2,#0] add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldr r12,[r2,#4] ldr r3,[r2,#8] ldr r1,[r2,#12] add r4,r4,r0 @ accumulate ldr r0,[r2,#16] add r5,r5,r12 ldr r12,[r2,#20] add r6,r6,r3 ldr r3,[r2,#24] add r7,r7,r1 ldr r1,[r2,#28] add r8,r8,r0 str r4,[r2],#4 add r9,r9,r12 str r5,[r2],#4 add r10,r10,r3 str r6,[r2],#4 add r11,r11,r1 str r7,[r2],#4 stmia r2,{r8-r11} ittte ne movne r1,sp ldrne r2,[sp,#0] eorne r12,r12,r12 ldreq sp,[sp,#76] @ restore original sp itt ne eorne r3,r5,r6 bne .L_00_48 ldmia sp!,{r4-r12,pc} .size zfs_sha256_block_neon,.-zfs_sha256_block_neon # if defined(__thumb2__) # define INST(a,b,c,d) .byte c,d|0xc,a,b # else # define INST(a,b,c,d) .byte a,b,c,d # endif .globl zfs_sha256_block_armv8 .type zfs_sha256_block_armv8,%function .align 5 zfs_sha256_block_armv8: .LARMv8: vld1.32 {q0,q1},[r0] sub r3,r3,#256+32 add r2,r1,r2,lsl#6 @ len to point at the end of inp b .Loop_v8 .align 4 .Loop_v8: vld1.8 {q8-q9},[r1]! vld1.8 {q10-q11},[r1]! vld1.32 {q12},[r3]! vrev32.8 q8,q8 vrev32.8 q9,q9 vrev32.8 q10,q10 vrev32.8 q11,q11 vmov q14,q0 @ offload vmov q15,q1 teq r1,r2 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vld1.32 {q13},[r3] vadd.i32 q12,q12,q10 sub r3,r3,#256-16 @ rewind vmov q2,q0 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vadd.i32 q13,q13,q11 vmov q2,q0 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vadd.i32 q0,q0,q14 vadd.i32 q1,q1,q15 it ne bne .Loop_v8 vst1.32 {q0,q1},[r0] bx lr @ bx lr .size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8 -#endif +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__) diff --git a/module/icp/asm-arm/sha2/sha512-armv7.S b/module/icp/asm-arm/sha2/sha512-armv7.S index 499cb6df9567..66d7dd3cf0f7 100644 --- a/module/icp/asm-arm/sha2/sha512-armv7.S +++ b/module/icp/asm-arm/sha2/sha512-armv7.S @@ -1,1825 +1,1827 @@ /* * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Portions Copyright (c) 2022 Tino Reichardt * - modified assembly to fit into OpenZFS */ #if defined(__arm__) #ifndef __ARM_ARCH # define __ARM_ARCH__ 7 #else # define __ARM_ARCH__ __ARM_ARCH #endif #ifndef __KERNEL__ # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} # define VFP_ABI_POP vldmia sp!,{d8-d15} #else # define VFP_ABI_PUSH # define VFP_ABI_POP #endif #ifdef __ARMEL__ # define LO 0 # define HI 4 # define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 #else # define HI 0 # define LO 4 # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 #endif #if defined(__thumb2__) .syntax unified .thumb # define adrl adr #else .code 32 #endif .text .type K512,%object .align 5 K512: WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 .word 0 @ terminator .align 5 .globl zfs_sha512_block_armv7 .type zfs_sha512_block_armv7,%function zfs_sha512_block_armv7: .Lzfs_sha512_block_armv7: #if __ARM_ARCH__<7 && !defined(__thumb2__) sub r3,pc,#8 @ zfs_sha512_block_armv7 #else adr r3,.Lzfs_sha512_block_armv7 #endif add r2,r1,r2,lsl#7 @ len to point at the end of inp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} sub r14,r3,#672 @ K512 sub sp,sp,#9*8 ldr r7,[r0,#32+LO] ldr r8,[r0,#32+HI] ldr r9, [r0,#48+LO] ldr r10, [r0,#48+HI] ldr r11, [r0,#56+LO] ldr r12, [r0,#56+HI] .Loop: str r9, [sp,#48+0] str r10, [sp,#48+4] str r11, [sp,#56+0] str r12, [sp,#56+4] ldr r5,[r0,#0+LO] ldr r6,[r0,#0+HI] ldr r3,[r0,#8+LO] ldr r4,[r0,#8+HI] ldr r9, [r0,#16+LO] ldr r10, [r0,#16+HI] ldr r11, [r0,#24+LO] ldr r12, [r0,#24+HI] str r3,[sp,#8+0] str r4,[sp,#8+4] str r9, [sp,#16+0] str r10, [sp,#16+4] str r11, [sp,#24+0] str r12, [sp,#24+4] ldr r3,[r0,#40+LO] ldr r4,[r0,#40+HI] str r3,[sp,#40+0] str r4,[sp,#40+4] .L00_15: #if __ARM_ARCH__<7 ldrb r3,[r1,#7] ldrb r9, [r1,#6] ldrb r10, [r1,#5] ldrb r11, [r1,#4] ldrb r4,[r1,#3] ldrb r12, [r1,#2] orr r3,r3,r9,lsl#8 ldrb r9, [r1,#1] orr r3,r3,r10,lsl#16 ldrb r10, [r1],#8 orr r3,r3,r11,lsl#24 orr r4,r4,r12,lsl#8 orr r4,r4,r9,lsl#16 orr r4,r4,r10,lsl#24 #else ldr r3,[r1,#4] ldr r4,[r1],#8 #ifdef __ARMEL__ rev r3,r3 rev r4,r4 #endif #endif @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 mov r9,r7,lsr#14 str r3,[sp,#64+0] mov r10,r8,lsr#14 str r4,[sp,#64+4] eor r9,r9,r8,lsl#18 ldr r11,[sp,#56+0] @ h.lo eor r10,r10,r7,lsl#18 ldr r12,[sp,#56+4] @ h.hi eor r9,r9,r7,lsr#18 eor r10,r10,r8,lsr#18 eor r9,r9,r8,lsl#14 eor r10,r10,r7,lsl#14 eor r9,r9,r8,lsr#9 eor r10,r10,r7,lsr#9 eor r9,r9,r7,lsl#23 eor r10,r10,r8,lsl#23 @ Sigma1(e) adds r3,r3,r9 ldr r9,[sp,#40+0] @ f.lo adc r4,r4,r10 @ T += Sigma1(e) ldr r10,[sp,#40+4] @ f.hi adds r3,r3,r11 ldr r11,[sp,#48+0] @ g.lo adc r4,r4,r12 @ T += h ldr r12,[sp,#48+4] @ g.hi eor r9,r9,r11 str r7,[sp,#32+0] eor r10,r10,r12 str r8,[sp,#32+4] and r9,r9,r7 str r5,[sp,#0+0] and r10,r10,r8 str r6,[sp,#0+4] eor r9,r9,r11 ldr r11,[r14,#LO] @ K[i].lo eor r10,r10,r12 @ Ch(e,f,g) ldr r12,[r14,#HI] @ K[i].hi adds r3,r3,r9 ldr r7,[sp,#24+0] @ d.lo adc r4,r4,r10 @ T += Ch(e,f,g) ldr r8,[sp,#24+4] @ d.hi adds r3,r3,r11 and r9,r11,#0xff adc r4,r4,r12 @ T += K[i] adds r7,r7,r3 ldr r11,[sp,#8+0] @ b.lo adc r8,r8,r4 @ d += T teq r9,#148 ldr r12,[sp,#16+0] @ c.lo #ifdef __thumb2__ it eq @ Thumb2 thing, sanity check in ARM #endif orreq r14,r14,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 mov r9,r5,lsr#28 mov r10,r6,lsr#28 eor r9,r9,r6,lsl#4 eor r10,r10,r5,lsl#4 eor r9,r9,r6,lsr#2 eor r10,r10,r5,lsr#2 eor r9,r9,r5,lsl#30 eor r10,r10,r6,lsl#30 eor r9,r9,r6,lsr#7 eor r10,r10,r5,lsr#7 eor r9,r9,r5,lsl#25 eor r10,r10,r6,lsl#25 @ Sigma0(a) adds r3,r3,r9 and r9,r5,r11 adc r4,r4,r10 @ T += Sigma0(a) ldr r10,[sp,#8+4] @ b.hi orr r5,r5,r11 ldr r11,[sp,#16+4] @ c.hi and r5,r5,r12 and r12,r6,r10 orr r6,r6,r10 orr r5,r5,r9 @ Maj(a,b,c).lo and r6,r6,r11 adds r5,r5,r3 orr r6,r6,r12 @ Maj(a,b,c).hi sub sp,sp,#8 adc r6,r6,r4 @ h += T tst r14,#1 add r14,r14,#8 tst r14,#1 beq .L00_15 ldr r9,[sp,#184+0] ldr r10,[sp,#184+4] bic r14,r14,#1 .L16_79: @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 mov r3,r9,lsr#1 ldr r11,[sp,#80+0] mov r4,r10,lsr#1 ldr r12,[sp,#80+4] eor r3,r3,r10,lsl#31 eor r4,r4,r9,lsl#31 eor r3,r3,r9,lsr#8 eor r4,r4,r10,lsr#8 eor r3,r3,r10,lsl#24 eor r4,r4,r9,lsl#24 eor r3,r3,r9,lsr#7 eor r4,r4,r10,lsr#7 eor r3,r3,r10,lsl#25 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 mov r9,r11,lsr#19 mov r10,r12,lsr#19 eor r9,r9,r12,lsl#13 eor r10,r10,r11,lsl#13 eor r9,r9,r12,lsr#29 eor r10,r10,r11,lsr#29 eor r9,r9,r11,lsl#3 eor r10,r10,r12,lsl#3 eor r9,r9,r11,lsr#6 eor r10,r10,r12,lsr#6 ldr r11,[sp,#120+0] eor r9,r9,r12,lsl#26 ldr r12,[sp,#120+4] adds r3,r3,r9 ldr r9,[sp,#192+0] adc r4,r4,r10 ldr r10,[sp,#192+4] adds r3,r3,r11 adc r4,r4,r12 adds r3,r3,r9 adc r4,r4,r10 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 mov r9,r7,lsr#14 str r3,[sp,#64+0] mov r10,r8,lsr#14 str r4,[sp,#64+4] eor r9,r9,r8,lsl#18 ldr r11,[sp,#56+0] @ h.lo eor r10,r10,r7,lsl#18 ldr r12,[sp,#56+4] @ h.hi eor r9,r9,r7,lsr#18 eor r10,r10,r8,lsr#18 eor r9,r9,r8,lsl#14 eor r10,r10,r7,lsl#14 eor r9,r9,r8,lsr#9 eor r10,r10,r7,lsr#9 eor r9,r9,r7,lsl#23 eor r10,r10,r8,lsl#23 @ Sigma1(e) adds r3,r3,r9 ldr r9,[sp,#40+0] @ f.lo adc r4,r4,r10 @ T += Sigma1(e) ldr r10,[sp,#40+4] @ f.hi adds r3,r3,r11 ldr r11,[sp,#48+0] @ g.lo adc r4,r4,r12 @ T += h ldr r12,[sp,#48+4] @ g.hi eor r9,r9,r11 str r7,[sp,#32+0] eor r10,r10,r12 str r8,[sp,#32+4] and r9,r9,r7 str r5,[sp,#0+0] and r10,r10,r8 str r6,[sp,#0+4] eor r9,r9,r11 ldr r11,[r14,#LO] @ K[i].lo eor r10,r10,r12 @ Ch(e,f,g) ldr r12,[r14,#HI] @ K[i].hi adds r3,r3,r9 ldr r7,[sp,#24+0] @ d.lo adc r4,r4,r10 @ T += Ch(e,f,g) ldr r8,[sp,#24+4] @ d.hi adds r3,r3,r11 and r9,r11,#0xff adc r4,r4,r12 @ T += K[i] adds r7,r7,r3 ldr r11,[sp,#8+0] @ b.lo adc r8,r8,r4 @ d += T teq r9,#23 ldr r12,[sp,#16+0] @ c.lo #ifdef __thumb2__ it eq @ Thumb2 thing, sanity check in ARM #endif orreq r14,r14,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 mov r9,r5,lsr#28 mov r10,r6,lsr#28 eor r9,r9,r6,lsl#4 eor r10,r10,r5,lsl#4 eor r9,r9,r6,lsr#2 eor r10,r10,r5,lsr#2 eor r9,r9,r5,lsl#30 eor r10,r10,r6,lsl#30 eor r9,r9,r6,lsr#7 eor r10,r10,r5,lsr#7 eor r9,r9,r5,lsl#25 eor r10,r10,r6,lsl#25 @ Sigma0(a) adds r3,r3,r9 and r9,r5,r11 adc r4,r4,r10 @ T += Sigma0(a) ldr r10,[sp,#8+4] @ b.hi orr r5,r5,r11 ldr r11,[sp,#16+4] @ c.hi and r5,r5,r12 and r12,r6,r10 orr r6,r6,r10 orr r5,r5,r9 @ Maj(a,b,c).lo and r6,r6,r11 adds r5,r5,r3 orr r6,r6,r12 @ Maj(a,b,c).hi sub sp,sp,#8 adc r6,r6,r4 @ h += T tst r14,#1 add r14,r14,#8 #ifdef __thumb2__ ittt eq @ Thumb2 thing, sanity check in ARM #endif ldreq r9,[sp,#184+0] ldreq r10,[sp,#184+4] beq .L16_79 bic r14,r14,#1 ldr r3,[sp,#8+0] ldr r4,[sp,#8+4] ldr r9, [r0,#0+LO] ldr r10, [r0,#0+HI] ldr r11, [r0,#8+LO] ldr r12, [r0,#8+HI] adds r9,r5,r9 str r9, [r0,#0+LO] adc r10,r6,r10 str r10, [r0,#0+HI] adds r11,r3,r11 str r11, [r0,#8+LO] adc r12,r4,r12 str r12, [r0,#8+HI] ldr r5,[sp,#16+0] ldr r6,[sp,#16+4] ldr r3,[sp,#24+0] ldr r4,[sp,#24+4] ldr r9, [r0,#16+LO] ldr r10, [r0,#16+HI] ldr r11, [r0,#24+LO] ldr r12, [r0,#24+HI] adds r9,r5,r9 str r9, [r0,#16+LO] adc r10,r6,r10 str r10, [r0,#16+HI] adds r11,r3,r11 str r11, [r0,#24+LO] adc r12,r4,r12 str r12, [r0,#24+HI] ldr r3,[sp,#40+0] ldr r4,[sp,#40+4] ldr r9, [r0,#32+LO] ldr r10, [r0,#32+HI] ldr r11, [r0,#40+LO] ldr r12, [r0,#40+HI] adds r7,r7,r9 str r7,[r0,#32+LO] adc r8,r8,r10 str r8,[r0,#32+HI] adds r11,r3,r11 str r11, [r0,#40+LO] adc r12,r4,r12 str r12, [r0,#40+HI] ldr r5,[sp,#48+0] ldr r6,[sp,#48+4] ldr r3,[sp,#56+0] ldr r4,[sp,#56+4] ldr r9, [r0,#48+LO] ldr r10, [r0,#48+HI] ldr r11, [r0,#56+LO] ldr r12, [r0,#56+HI] adds r9,r5,r9 str r9, [r0,#48+LO] adc r10,r6,r10 str r10, [r0,#48+HI] adds r11,r3,r11 str r11, [r0,#56+LO] adc r12,r4,r12 str r12, [r0,#56+HI] add sp,sp,#640 sub r14,r14,#640 teq r1,r2 bne .Loop add sp,sp,#8*9 @ destroy frame #if __ARM_ARCH__>=5 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 +#if __ARM_ARCH__ >= 7 .arch armv7-a .fpu neon .globl zfs_sha512_block_neon .type zfs_sha512_block_neon,%function .align 4 zfs_sha512_block_neon: .LNEON: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp adr r3,K512 VFP_ABI_PUSH vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 #if 0<16 vld1.64 {d0},[r1]! @ handles unaligned #endif vshr.u64 d25,d20,#18 #if 0>0 vadd.i64 d16,d30 @ h+=Maj from the past #endif vshr.u64 d26,d20,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 vmov d29,d20 vsli.64 d26,d20,#23 #if 0<16 && defined(__ARMEL__) vrev64.8 d0,d0 #endif veor d25,d24 vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 vsli.64 d24,d16,#36 vadd.i64 d27,d26 vshr.u64 d26,d16,#39 vadd.i64 d28,d0 vsli.64 d25,d16,#30 veor d30,d16,d17 vsli.64 d26,d16,#25 veor d23,d24,d25 vadd.i64 d27,d28 vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) vadd.i64 d19,d27 vadd.i64 d30,d27 @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 1 #if 1<16 vld1.64 {d1},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 #if 1>0 vadd.i64 d23,d30 @ h+=Maj from the past #endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 vmov d29,d19 vsli.64 d26,d19,#23 #if 1<16 && defined(__ARMEL__) vrev64.8 d1,d1 #endif veor d25,d24 vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 vsli.64 d24,d23,#36 vadd.i64 d27,d26 vshr.u64 d26,d23,#39 vadd.i64 d28,d1 vsli.64 d25,d23,#30 veor d30,d23,d16 vsli.64 d26,d23,#25 veor d22,d24,d25 vadd.i64 d27,d28 vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) vadd.i64 d18,d27 vadd.i64 d30,d27 @ vadd.i64 d22,d30 vshr.u64 d24,d18,#14 @ 2 #if 2<16 vld1.64 {d2},[r1]! @ handles unaligned #endif vshr.u64 d25,d18,#18 #if 2>0 vadd.i64 d22,d30 @ h+=Maj from the past #endif vshr.u64 d26,d18,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 vmov d29,d18 vsli.64 d26,d18,#23 #if 2<16 && defined(__ARMEL__) vrev64.8 d2,d2 #endif veor d25,d24 vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 vsli.64 d24,d22,#36 vadd.i64 d27,d26 vshr.u64 d26,d22,#39 vadd.i64 d28,d2 vsli.64 d25,d22,#30 veor d30,d22,d23 vsli.64 d26,d22,#25 veor d21,d24,d25 vadd.i64 d27,d28 vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) vadd.i64 d17,d27 vadd.i64 d30,d27 @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 3 #if 3<16 vld1.64 {d3},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 #if 3>0 vadd.i64 d21,d30 @ h+=Maj from the past #endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 vmov d29,d17 vsli.64 d26,d17,#23 #if 3<16 && defined(__ARMEL__) vrev64.8 d3,d3 #endif veor d25,d24 vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 vsli.64 d24,d21,#36 vadd.i64 d27,d26 vshr.u64 d26,d21,#39 vadd.i64 d28,d3 vsli.64 d25,d21,#30 veor d30,d21,d22 vsli.64 d26,d21,#25 veor d20,d24,d25 vadd.i64 d27,d28 vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) vadd.i64 d16,d27 vadd.i64 d30,d27 @ vadd.i64 d20,d30 vshr.u64 d24,d16,#14 @ 4 #if 4<16 vld1.64 {d4},[r1]! @ handles unaligned #endif vshr.u64 d25,d16,#18 #if 4>0 vadd.i64 d20,d30 @ h+=Maj from the past #endif vshr.u64 d26,d16,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 vmov d29,d16 vsli.64 d26,d16,#23 #if 4<16 && defined(__ARMEL__) vrev64.8 d4,d4 #endif veor d25,d24 vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 vsli.64 d24,d20,#36 vadd.i64 d27,d26 vshr.u64 d26,d20,#39 vadd.i64 d28,d4 vsli.64 d25,d20,#30 veor d30,d20,d21 vsli.64 d26,d20,#25 veor d19,d24,d25 vadd.i64 d27,d28 vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) vadd.i64 d23,d27 vadd.i64 d30,d27 @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 5 #if 5<16 vld1.64 {d5},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 #if 5>0 vadd.i64 d19,d30 @ h+=Maj from the past #endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 vmov d29,d23 vsli.64 d26,d23,#23 #if 5<16 && defined(__ARMEL__) vrev64.8 d5,d5 #endif veor d25,d24 vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 vsli.64 d24,d19,#36 vadd.i64 d27,d26 vshr.u64 d26,d19,#39 vadd.i64 d28,d5 vsli.64 d25,d19,#30 veor d30,d19,d20 vsli.64 d26,d19,#25 veor d18,d24,d25 vadd.i64 d27,d28 vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) vadd.i64 d22,d27 vadd.i64 d30,d27 @ vadd.i64 d18,d30 vshr.u64 d24,d22,#14 @ 6 #if 6<16 vld1.64 {d6},[r1]! @ handles unaligned #endif vshr.u64 d25,d22,#18 #if 6>0 vadd.i64 d18,d30 @ h+=Maj from the past #endif vshr.u64 d26,d22,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 vmov d29,d22 vsli.64 d26,d22,#23 #if 6<16 && defined(__ARMEL__) vrev64.8 d6,d6 #endif veor d25,d24 vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 vsli.64 d24,d18,#36 vadd.i64 d27,d26 vshr.u64 d26,d18,#39 vadd.i64 d28,d6 vsli.64 d25,d18,#30 veor d30,d18,d19 vsli.64 d26,d18,#25 veor d17,d24,d25 vadd.i64 d27,d28 vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) vadd.i64 d21,d27 vadd.i64 d30,d27 @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 7 #if 7<16 vld1.64 {d7},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 #if 7>0 vadd.i64 d17,d30 @ h+=Maj from the past #endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 vmov d29,d21 vsli.64 d26,d21,#23 #if 7<16 && defined(__ARMEL__) vrev64.8 d7,d7 #endif veor d25,d24 vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 vsli.64 d24,d17,#36 vadd.i64 d27,d26 vshr.u64 d26,d17,#39 vadd.i64 d28,d7 vsli.64 d25,d17,#30 veor d30,d17,d18 vsli.64 d26,d17,#25 veor d16,d24,d25 vadd.i64 d27,d28 vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) vadd.i64 d20,d27 vadd.i64 d30,d27 @ vadd.i64 d16,d30 vshr.u64 d24,d20,#14 @ 8 #if 8<16 vld1.64 {d8},[r1]! @ handles unaligned #endif vshr.u64 d25,d20,#18 #if 8>0 vadd.i64 d16,d30 @ h+=Maj from the past #endif vshr.u64 d26,d20,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 vmov d29,d20 vsli.64 d26,d20,#23 #if 8<16 && defined(__ARMEL__) vrev64.8 d8,d8 #endif veor d25,d24 vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 vsli.64 d24,d16,#36 vadd.i64 d27,d26 vshr.u64 d26,d16,#39 vadd.i64 d28,d8 vsli.64 d25,d16,#30 veor d30,d16,d17 vsli.64 d26,d16,#25 veor d23,d24,d25 vadd.i64 d27,d28 vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) vadd.i64 d19,d27 vadd.i64 d30,d27 @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 9 #if 9<16 vld1.64 {d9},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 #if 9>0 vadd.i64 d23,d30 @ h+=Maj from the past #endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 vmov d29,d19 vsli.64 d26,d19,#23 #if 9<16 && defined(__ARMEL__) vrev64.8 d9,d9 #endif veor d25,d24 vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 vsli.64 d24,d23,#36 vadd.i64 d27,d26 vshr.u64 d26,d23,#39 vadd.i64 d28,d9 vsli.64 d25,d23,#30 veor d30,d23,d16 vsli.64 d26,d23,#25 veor d22,d24,d25 vadd.i64 d27,d28 vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) vadd.i64 d18,d27 vadd.i64 d30,d27 @ vadd.i64 d22,d30 vshr.u64 d24,d18,#14 @ 10 #if 10<16 vld1.64 {d10},[r1]! @ handles unaligned #endif vshr.u64 d25,d18,#18 #if 10>0 vadd.i64 d22,d30 @ h+=Maj from the past #endif vshr.u64 d26,d18,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 vmov d29,d18 vsli.64 d26,d18,#23 #if 10<16 && defined(__ARMEL__) vrev64.8 d10,d10 #endif veor d25,d24 vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 vsli.64 d24,d22,#36 vadd.i64 d27,d26 vshr.u64 d26,d22,#39 vadd.i64 d28,d10 vsli.64 d25,d22,#30 veor d30,d22,d23 vsli.64 d26,d22,#25 veor d21,d24,d25 vadd.i64 d27,d28 vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) vadd.i64 d17,d27 vadd.i64 d30,d27 @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 11 #if 11<16 vld1.64 {d11},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 #if 11>0 vadd.i64 d21,d30 @ h+=Maj from the past #endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 vmov d29,d17 vsli.64 d26,d17,#23 #if 11<16 && defined(__ARMEL__) vrev64.8 d11,d11 #endif veor d25,d24 vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 vsli.64 d24,d21,#36 vadd.i64 d27,d26 vshr.u64 d26,d21,#39 vadd.i64 d28,d11 vsli.64 d25,d21,#30 veor d30,d21,d22 vsli.64 d26,d21,#25 veor d20,d24,d25 vadd.i64 d27,d28 vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) vadd.i64 d16,d27 vadd.i64 d30,d27 @ vadd.i64 d20,d30 vshr.u64 d24,d16,#14 @ 12 #if 12<16 vld1.64 {d12},[r1]! @ handles unaligned #endif vshr.u64 d25,d16,#18 #if 12>0 vadd.i64 d20,d30 @ h+=Maj from the past #endif vshr.u64 d26,d16,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 vmov d29,d16 vsli.64 d26,d16,#23 #if 12<16 && defined(__ARMEL__) vrev64.8 d12,d12 #endif veor d25,d24 vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 vsli.64 d24,d20,#36 vadd.i64 d27,d26 vshr.u64 d26,d20,#39 vadd.i64 d28,d12 vsli.64 d25,d20,#30 veor d30,d20,d21 vsli.64 d26,d20,#25 veor d19,d24,d25 vadd.i64 d27,d28 vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) vadd.i64 d23,d27 vadd.i64 d30,d27 @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 13 #if 13<16 vld1.64 {d13},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 #if 13>0 vadd.i64 d19,d30 @ h+=Maj from the past #endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 vmov d29,d23 vsli.64 d26,d23,#23 #if 13<16 && defined(__ARMEL__) vrev64.8 d13,d13 #endif veor d25,d24 vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 vsli.64 d24,d19,#36 vadd.i64 d27,d26 vshr.u64 d26,d19,#39 vadd.i64 d28,d13 vsli.64 d25,d19,#30 veor d30,d19,d20 vsli.64 d26,d19,#25 veor d18,d24,d25 vadd.i64 d27,d28 vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) vadd.i64 d22,d27 vadd.i64 d30,d27 @ vadd.i64 d18,d30 vshr.u64 d24,d22,#14 @ 14 #if 14<16 vld1.64 {d14},[r1]! @ handles unaligned #endif vshr.u64 d25,d22,#18 #if 14>0 vadd.i64 d18,d30 @ h+=Maj from the past #endif vshr.u64 d26,d22,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 vmov d29,d22 vsli.64 d26,d22,#23 #if 14<16 && defined(__ARMEL__) vrev64.8 d14,d14 #endif veor d25,d24 vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 vsli.64 d24,d18,#36 vadd.i64 d27,d26 vshr.u64 d26,d18,#39 vadd.i64 d28,d14 vsli.64 d25,d18,#30 veor d30,d18,d19 vsli.64 d26,d18,#25 veor d17,d24,d25 vadd.i64 d27,d28 vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) vadd.i64 d21,d27 vadd.i64 d30,d27 @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 15 #if 15<16 vld1.64 {d15},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 #if 15>0 vadd.i64 d17,d30 @ h+=Maj from the past #endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 vmov d29,d21 vsli.64 d26,d21,#23 #if 15<16 && defined(__ARMEL__) vrev64.8 d15,d15 #endif veor d25,d24 vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 vsli.64 d24,d17,#36 vadd.i64 d27,d26 vshr.u64 d26,d17,#39 vadd.i64 d28,d15 vsli.64 d25,d17,#30 veor d30,d17,d18 vsli.64 d26,d17,#25 veor d16,d24,d25 vadd.i64 d27,d28 vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) vadd.i64 d20,d27 vadd.i64 d30,d27 @ vadd.i64 d16,d30 mov r12,#4 .L16_79_neon: subs r12,#1 vshr.u64 q12,q7,#19 vshr.u64 q13,q7,#61 vadd.i64 d16,d30 @ h+=Maj from the past vshr.u64 q15,q7,#6 vsli.64 q12,q7,#45 vext.8 q14,q0,q1,#8 @ X[i+1] vsli.64 q13,q7,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q0,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q4,q5,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d20,#14 @ from NEON_00_15 vadd.i64 q0,q14 vshr.u64 d25,d20,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d20,#41 @ from NEON_00_15 vadd.i64 q0,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 vmov d29,d20 vsli.64 d26,d20,#23 #if 16<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 vsli.64 d24,d16,#36 vadd.i64 d27,d26 vshr.u64 d26,d16,#39 vadd.i64 d28,d0 vsli.64 d25,d16,#30 veor d30,d16,d17 vsli.64 d26,d16,#25 veor d23,d24,d25 vadd.i64 d27,d28 vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) vadd.i64 d19,d27 vadd.i64 d30,d27 @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 17 #if 17<16 vld1.64 {d1},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 #if 17>0 vadd.i64 d23,d30 @ h+=Maj from the past #endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 vmov d29,d19 vsli.64 d26,d19,#23 #if 17<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 vsli.64 d24,d23,#36 vadd.i64 d27,d26 vshr.u64 d26,d23,#39 vadd.i64 d28,d1 vsli.64 d25,d23,#30 veor d30,d23,d16 vsli.64 d26,d23,#25 veor d22,d24,d25 vadd.i64 d27,d28 vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) vadd.i64 d18,d27 vadd.i64 d30,d27 @ vadd.i64 d22,d30 vshr.u64 q12,q0,#19 vshr.u64 q13,q0,#61 vadd.i64 d22,d30 @ h+=Maj from the past vshr.u64 q15,q0,#6 vsli.64 q12,q0,#45 vext.8 q14,q1,q2,#8 @ X[i+1] vsli.64 q13,q0,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q1,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q5,q6,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d18,#14 @ from NEON_00_15 vadd.i64 q1,q14 vshr.u64 d25,d18,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d18,#41 @ from NEON_00_15 vadd.i64 q1,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 vmov d29,d18 vsli.64 d26,d18,#23 #if 18<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 vsli.64 d24,d22,#36 vadd.i64 d27,d26 vshr.u64 d26,d22,#39 vadd.i64 d28,d2 vsli.64 d25,d22,#30 veor d30,d22,d23 vsli.64 d26,d22,#25 veor d21,d24,d25 vadd.i64 d27,d28 vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) vadd.i64 d17,d27 vadd.i64 d30,d27 @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 19 #if 19<16 vld1.64 {d3},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 #if 19>0 vadd.i64 d21,d30 @ h+=Maj from the past #endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 vmov d29,d17 vsli.64 d26,d17,#23 #if 19<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 vsli.64 d24,d21,#36 vadd.i64 d27,d26 vshr.u64 d26,d21,#39 vadd.i64 d28,d3 vsli.64 d25,d21,#30 veor d30,d21,d22 vsli.64 d26,d21,#25 veor d20,d24,d25 vadd.i64 d27,d28 vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) vadd.i64 d16,d27 vadd.i64 d30,d27 @ vadd.i64 d20,d30 vshr.u64 q12,q1,#19 vshr.u64 q13,q1,#61 vadd.i64 d20,d30 @ h+=Maj from the past vshr.u64 q15,q1,#6 vsli.64 q12,q1,#45 vext.8 q14,q2,q3,#8 @ X[i+1] vsli.64 q13,q1,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q2,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q6,q7,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d16,#14 @ from NEON_00_15 vadd.i64 q2,q14 vshr.u64 d25,d16,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d16,#41 @ from NEON_00_15 vadd.i64 q2,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 vmov d29,d16 vsli.64 d26,d16,#23 #if 20<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 vsli.64 d24,d20,#36 vadd.i64 d27,d26 vshr.u64 d26,d20,#39 vadd.i64 d28,d4 vsli.64 d25,d20,#30 veor d30,d20,d21 vsli.64 d26,d20,#25 veor d19,d24,d25 vadd.i64 d27,d28 vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) vadd.i64 d23,d27 vadd.i64 d30,d27 @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 21 #if 21<16 vld1.64 {d5},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 #if 21>0 vadd.i64 d19,d30 @ h+=Maj from the past #endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 vmov d29,d23 vsli.64 d26,d23,#23 #if 21<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 vsli.64 d24,d19,#36 vadd.i64 d27,d26 vshr.u64 d26,d19,#39 vadd.i64 d28,d5 vsli.64 d25,d19,#30 veor d30,d19,d20 vsli.64 d26,d19,#25 veor d18,d24,d25 vadd.i64 d27,d28 vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) vadd.i64 d22,d27 vadd.i64 d30,d27 @ vadd.i64 d18,d30 vshr.u64 q12,q2,#19 vshr.u64 q13,q2,#61 vadd.i64 d18,d30 @ h+=Maj from the past vshr.u64 q15,q2,#6 vsli.64 q12,q2,#45 vext.8 q14,q3,q4,#8 @ X[i+1] vsli.64 q13,q2,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q3,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q7,q0,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d22,#14 @ from NEON_00_15 vadd.i64 q3,q14 vshr.u64 d25,d22,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d22,#41 @ from NEON_00_15 vadd.i64 q3,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 vmov d29,d22 vsli.64 d26,d22,#23 #if 22<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 vsli.64 d24,d18,#36 vadd.i64 d27,d26 vshr.u64 d26,d18,#39 vadd.i64 d28,d6 vsli.64 d25,d18,#30 veor d30,d18,d19 vsli.64 d26,d18,#25 veor d17,d24,d25 vadd.i64 d27,d28 vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) vadd.i64 d21,d27 vadd.i64 d30,d27 @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 23 #if 23<16 vld1.64 {d7},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 #if 23>0 vadd.i64 d17,d30 @ h+=Maj from the past #endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 vmov d29,d21 vsli.64 d26,d21,#23 #if 23<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 vsli.64 d24,d17,#36 vadd.i64 d27,d26 vshr.u64 d26,d17,#39 vadd.i64 d28,d7 vsli.64 d25,d17,#30 veor d30,d17,d18 vsli.64 d26,d17,#25 veor d16,d24,d25 vadd.i64 d27,d28 vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) vadd.i64 d20,d27 vadd.i64 d30,d27 @ vadd.i64 d16,d30 vshr.u64 q12,q3,#19 vshr.u64 q13,q3,#61 vadd.i64 d16,d30 @ h+=Maj from the past vshr.u64 q15,q3,#6 vsli.64 q12,q3,#45 vext.8 q14,q4,q5,#8 @ X[i+1] vsli.64 q13,q3,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q4,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q0,q1,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d20,#14 @ from NEON_00_15 vadd.i64 q4,q14 vshr.u64 d25,d20,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d20,#41 @ from NEON_00_15 vadd.i64 q4,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 vmov d29,d20 vsli.64 d26,d20,#23 #if 24<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 vsli.64 d24,d16,#36 vadd.i64 d27,d26 vshr.u64 d26,d16,#39 vadd.i64 d28,d8 vsli.64 d25,d16,#30 veor d30,d16,d17 vsli.64 d26,d16,#25 veor d23,d24,d25 vadd.i64 d27,d28 vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) vadd.i64 d19,d27 vadd.i64 d30,d27 @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 25 #if 25<16 vld1.64 {d9},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 #if 25>0 vadd.i64 d23,d30 @ h+=Maj from the past #endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 vmov d29,d19 vsli.64 d26,d19,#23 #if 25<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 vsli.64 d24,d23,#36 vadd.i64 d27,d26 vshr.u64 d26,d23,#39 vadd.i64 d28,d9 vsli.64 d25,d23,#30 veor d30,d23,d16 vsli.64 d26,d23,#25 veor d22,d24,d25 vadd.i64 d27,d28 vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) vadd.i64 d18,d27 vadd.i64 d30,d27 @ vadd.i64 d22,d30 vshr.u64 q12,q4,#19 vshr.u64 q13,q4,#61 vadd.i64 d22,d30 @ h+=Maj from the past vshr.u64 q15,q4,#6 vsli.64 q12,q4,#45 vext.8 q14,q5,q6,#8 @ X[i+1] vsli.64 q13,q4,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q5,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q1,q2,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d18,#14 @ from NEON_00_15 vadd.i64 q5,q14 vshr.u64 d25,d18,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d18,#41 @ from NEON_00_15 vadd.i64 q5,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 vmov d29,d18 vsli.64 d26,d18,#23 #if 26<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 vsli.64 d24,d22,#36 vadd.i64 d27,d26 vshr.u64 d26,d22,#39 vadd.i64 d28,d10 vsli.64 d25,d22,#30 veor d30,d22,d23 vsli.64 d26,d22,#25 veor d21,d24,d25 vadd.i64 d27,d28 vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) vadd.i64 d17,d27 vadd.i64 d30,d27 @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 27 #if 27<16 vld1.64 {d11},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 #if 27>0 vadd.i64 d21,d30 @ h+=Maj from the past #endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 vmov d29,d17 vsli.64 d26,d17,#23 #if 27<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 vsli.64 d24,d21,#36 vadd.i64 d27,d26 vshr.u64 d26,d21,#39 vadd.i64 d28,d11 vsli.64 d25,d21,#30 veor d30,d21,d22 vsli.64 d26,d21,#25 veor d20,d24,d25 vadd.i64 d27,d28 vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) vadd.i64 d16,d27 vadd.i64 d30,d27 @ vadd.i64 d20,d30 vshr.u64 q12,q5,#19 vshr.u64 q13,q5,#61 vadd.i64 d20,d30 @ h+=Maj from the past vshr.u64 q15,q5,#6 vsli.64 q12,q5,#45 vext.8 q14,q6,q7,#8 @ X[i+1] vsli.64 q13,q5,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q6,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q2,q3,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d16,#14 @ from NEON_00_15 vadd.i64 q6,q14 vshr.u64 d25,d16,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d16,#41 @ from NEON_00_15 vadd.i64 q6,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 vmov d29,d16 vsli.64 d26,d16,#23 #if 28<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 vsli.64 d24,d20,#36 vadd.i64 d27,d26 vshr.u64 d26,d20,#39 vadd.i64 d28,d12 vsli.64 d25,d20,#30 veor d30,d20,d21 vsli.64 d26,d20,#25 veor d19,d24,d25 vadd.i64 d27,d28 vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) vadd.i64 d23,d27 vadd.i64 d30,d27 @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 29 #if 29<16 vld1.64 {d13},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 #if 29>0 vadd.i64 d19,d30 @ h+=Maj from the past #endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 vmov d29,d23 vsli.64 d26,d23,#23 #if 29<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 vsli.64 d24,d19,#36 vadd.i64 d27,d26 vshr.u64 d26,d19,#39 vadd.i64 d28,d13 vsli.64 d25,d19,#30 veor d30,d19,d20 vsli.64 d26,d19,#25 veor d18,d24,d25 vadd.i64 d27,d28 vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) vadd.i64 d22,d27 vadd.i64 d30,d27 @ vadd.i64 d18,d30 vshr.u64 q12,q6,#19 vshr.u64 q13,q6,#61 vadd.i64 d18,d30 @ h+=Maj from the past vshr.u64 q15,q6,#6 vsli.64 q12,q6,#45 vext.8 q14,q7,q0,#8 @ X[i+1] vsli.64 q13,q6,#3 veor q15,q12 vshr.u64 q12,q14,#1 veor q15,q13 @ sigma1(X[i+14]) vshr.u64 q13,q14,#8 vadd.i64 q7,q15 vshr.u64 q15,q14,#7 vsli.64 q12,q14,#63 vsli.64 q13,q14,#56 vext.8 q14,q3,q4,#8 @ X[i+9] veor q15,q12 vshr.u64 d24,d22,#14 @ from NEON_00_15 vadd.i64 q7,q14 vshr.u64 d25,d22,#18 @ from NEON_00_15 veor q15,q13 @ sigma0(X[i+1]) vshr.u64 d26,d22,#41 @ from NEON_00_15 vadd.i64 q7,q15 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 vmov d29,d22 vsli.64 d26,d22,#23 #if 30<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 vsli.64 d24,d18,#36 vadd.i64 d27,d26 vshr.u64 d26,d18,#39 vadd.i64 d28,d14 vsli.64 d25,d18,#30 veor d30,d18,d19 vsli.64 d26,d18,#25 veor d17,d24,d25 vadd.i64 d27,d28 vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) vadd.i64 d21,d27 vadd.i64 d30,d27 @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 31 #if 31<16 vld1.64 {d15},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 #if 31>0 vadd.i64 d17,d30 @ h+=Maj from the past #endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 vmov d29,d21 vsli.64 d26,d21,#23 #if 31<16 && defined(__ARMEL__) vrev64.8 , #endif veor d25,d24 vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 veor d26,d25 @ Sigma1(e) vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 vsli.64 d24,d17,#36 vadd.i64 d27,d26 vshr.u64 d26,d17,#39 vadd.i64 d28,d15 vsli.64 d25,d17,#30 veor d30,d17,d18 vsli.64 d26,d17,#25 veor d16,d24,d25 vadd.i64 d27,d28 vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) vadd.i64 d20,d27 vadd.i64 d30,d27 @ vadd.i64 d16,d30 bne .L16_79_neon vadd.i64 d16,d30 @ h+=Maj from the past vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp vadd.i64 q8,q12 @ vectorized accumulate vadd.i64 q9,q13 vadd.i64 q10,q14 vadd.i64 q11,q15 vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context teq r1,r2 sub r3,#640 @ rewind K512 bne .Loop_neon VFP_ABI_POP bx lr @ .word 0xe12fff1e .size zfs_sha512_block_neon,.-zfs_sha512_block_neon -#endif +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__)