diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy.S b/contrib/arm-optimized-routines/string/aarch64/memcpy.S --- a/contrib/arm-optimized-routines/string/aarch64/memcpy.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcpy.S @@ -133,12 +133,12 @@ stp G_l, G_h, [dstend, -64] stp H_l, H_h, [dstend, -48] L(copy96): + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] stp A_l, A_h, [dstin] stp B_l, B_h, [dstin, 16] stp E_l, E_h, [dstin, 32] stp F_l, F_h, [dstin, 48] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] ret .p2align 4 @@ -233,10 +233,10 @@ stp C_l, C_h, [dstend, -48] ldp C_l, C_h, [src] stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp A_l, A_h, [dstin, 32] + stp G_l, G_h, [dstin, 48] ret END (__memcpy_aarch64) diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S --- a/sys/arm64/arm64/memcpy.S +++ b/sys/arm64/arm64/memcpy.S @@ -132,12 +133,12 @@ stp G_l, G_h, [dstend, -64] stp H_l, H_h, [dstend, -48] L(copy96): + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] stp A_l, A_h, [dstin] stp B_l, B_h, [dstin, 16] stp E_l, E_h, [dstin, 32] stp F_l, F_h, [dstin, 48] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] ret .p2align 4 @@ -232,10 +233,10 @@ stp C_l, C_h, [dstend, -48] ldp C_l, C_h, [src] stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp A_l, A_h, [dstin, 32] + stp G_l, G_h, [dstin, 48] ret EEND(memmove) END(memcpy)