diff --git a/sys/crypto/openssl/powerpc/aesp8-ppc.S b/sys/crypto/openssl/powerpc/aesp8-ppc.S --- a/sys/crypto/openssl/powerpc/aesp8-ppc.S +++ b/sys/crypto/openssl/powerpc/aesp8-ppc.S @@ -9,11 +9,12 @@ .byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 .byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe .Lconsts: mflr 0 bcl 20,31,$+4 mflr 6 - addi 6,6,-0x48 + addi 6,6,-0x58 mtlr 0 blr .long 0 @@ -2347,6 +2348,18 @@ li 31,0x70 mtspr 256,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -2389,69 +2402,77 @@ vperm 31,31,22,7 lvx 25,3,7 + + + + + + + + vperm 0,2,4,5 subi 10,10,31 vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -2477,6 +2498,8 @@ lvx 25,3,7 bdnz .Loop_xts_enc6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C508 @@ -2486,7 +2509,6 @@ vaddubm 8,8,8 .long 0x11ADC508 .long 0x11CEC508 - vsldoi 11,11,11,15 .long 0x11EFC508 .long 0x1210C508 @@ -2494,7 +2516,8 @@ vand 11,11,10 .long 0x10E7CD08 .long 0x118CCD08 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD08 .long 0x11CECD08 vxor 1,18,31 @@ -2505,13 +2528,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D508 .long 0x118CD508 vand 11,11,10 .long 0x11ADD508 .long 0x11CED508 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD508 .long 0x1210D508 @@ -2525,7 +2548,6 @@ vaddubm 8,8,8 .long 0x10E7DD08 .long 0x118CDD08 - vsldoi 11,11,11,15 .long 0x11ADDD08 .long 0x11CEDD08 vand 11,11,10 @@ -2533,7 +2555,8 @@ .long 0x1210DD08 addi 7,1,32+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E508 .long 0x118CE508 vxor 3,20,31 @@ -2542,7 +2565,6 @@ .long 0x11ADE508 .long 0x11CEE508 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE508 .long 0x1210E508 lvx 24,0,7 @@ -2550,7 +2572,8 @@ .long 0x10E7ED08 .long 0x118CED08 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED08 .long 0x11CEED08 vxor 4,21,31 @@ -2560,14 +2583,14 @@ .long 0x1210ED08 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F508 .long 0x118CF508 vand 11,11,10 .long 0x11ADF508 .long 0x11CEF508 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF508 .long 0x1210F508 vxor 5,22,31 @@ -2577,7 +2600,6 @@ .long 0x10E70509 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D09 .long 0x7C235699 .long 0x11AD1509 @@ -2590,7 +2612,10 @@ .long 0x11EF2509 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x11702D09 @@ -2623,6 +2648,8 @@ mtctr 9 beq .Loop_xts_enc6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_enc6x_zero cmpwi 5,0x20 @@ -2999,6 +3026,18 @@ li 31,0x70 mtspr 256,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -3046,64 +3085,64 @@ vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -3129,6 +3168,8 @@ lvx 25,3,7 bdnz .Loop_xts_dec6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C548 @@ -3138,7 +3179,6 @@ vaddubm 8,8,8 .long 0x11ADC548 .long 0x11CEC548 - vsldoi 11,11,11,15 .long 0x11EFC548 .long 0x1210C548 @@ -3146,7 +3186,8 @@ vand 11,11,10 .long 0x10E7CD48 .long 0x118CCD48 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD48 .long 0x11CECD48 vxor 1,18,31 @@ -3157,13 +3198,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D548 .long 0x118CD548 vand 11,11,10 .long 0x11ADD548 .long 0x11CED548 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD548 .long 0x1210D548 @@ -3177,7 +3218,6 @@ vaddubm 8,8,8 .long 0x10E7DD48 .long 0x118CDD48 - vsldoi 11,11,11,15 .long 0x11ADDD48 .long 0x11CEDD48 vand 11,11,10 @@ -3185,7 +3225,8 @@ .long 0x1210DD48 addi 7,1,32+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E548 .long 0x118CE548 vxor 3,20,31 @@ -3194,7 +3235,6 @@ .long 0x11ADE548 .long 0x11CEE548 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE548 .long 0x1210E548 lvx 24,0,7 @@ -3202,7 +3242,8 @@ .long 0x10E7ED48 .long 0x118CED48 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED48 .long 0x11CEED48 vxor 4,21,31 @@ -3212,14 +3253,14 @@ .long 0x1210ED48 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F548 .long 0x118CF548 vand 11,11,10 .long 0x11ADF548 .long 0x11CEF548 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF548 .long 0x1210F548 vxor 5,22,31 @@ -3229,7 +3270,6 @@ .long 0x10E70549 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D49 .long 0x7C235699 .long 0x11AD1549 @@ -3242,7 +3282,10 @@ .long 0x11EF2549 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x12102D49 .long 0x7CBD5699 @@ -3273,6 +3316,8 @@ mtctr 9 beq .Loop_xts_dec6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_dec6x_zero cmpwi 5,0x20 diff --git a/sys/crypto/openssl/powerpc/poly1305-ppc.S b/sys/crypto/openssl/powerpc/poly1305-ppc.S --- a/sys/crypto/openssl/powerpc/poly1305-ppc.S +++ b/sys/crypto/openssl/powerpc/poly1305-ppc.S @@ -473,7 +473,7 @@ .align 5 __poly1305_blocks_vsx: - stwu 1,-384(1) + stwu 1,-368(1) mflr 0 li 10,167 li 11,183 @@ -484,12 +484,12 @@ addi 11,11,32 stvx 22,10,1 addi 10,10,32 - stvx 23,10,1 - addi 10,10,32 - stvx 24,11,1 + stvx 23,11,1 addi 11,11,32 - stvx 25,10,1 + stvx 24,10,1 addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 stvx 26,10,1 addi 10,10,32 stvx 27,11,1 @@ -500,15 +500,15 @@ addi 11,11,32 stvx 30,10,1 stvx 31,11,1 - stw 12,360(1) + stw 12,344(1) li 12,-1 mtspr 256,12 - stw 27,364(1) - stw 28,368(1) - stw 29,372(1) - stw 30,376(1) - stw 31,380(1) - stw 0,388(1) + stw 27,348(1) + stw 28,352(1) + stw 29,356(1) + stw 30,360(1) + stw 31,364(1) + stw 0,372(1) bl .LPICmeup @@ -1209,7 +1209,7 @@ .align 4 .Ldone_vsx: - lwz 0,388(1) + lwz 0,372(1) li 27,4 li 28,8 li 29,12 @@ -1220,39 +1220,39 @@ .long 0x7C7D1919 .long 0x7C9E1919 - lwz 12,360(1) + lwz 12,344(1) mtlr 0 li 10,167 li 11,183 mtspr 256,12 lvx 20,10,1 addi 10,10,32 - lvx 21,10,1 - addi 10,10,32 - lvx 22,11,1 + lvx 21,11,1 addi 11,11,32 - lvx 23,10,1 + lvx 22,10,1 addi 10,10,32 - lvx 24,11,1 + lvx 23,11,1 addi 11,11,32 - lvx 25,10,1 + lvx 24,10,1 addi 10,10,32 - lvx 26,11,1 + lvx 25,11,1 addi 11,11,32 - lvx 27,10,1 + lvx 26,10,1 addi 10,10,32 - lvx 28,11,1 + lvx 27,11,1 addi 11,11,32 - lvx 29,10,1 + lvx 28,10,1 addi 10,10,32 - lvx 30,11,1 - lvx 31,10,1 - lwz 27,364(1) - lwz 28,368(1) - lwz 29,372(1) - lwz 30,376(1) - lwz 31,380(1) - addi 1,1,384 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 27,348(1) + lwz 28,352(1) + lwz 29,356(1) + lwz 30,360(1) + lwz 31,364(1) + addi 1,1,368 blr .long 0 .byte 0,12,0x04,1,0x80,5,4,0 diff --git a/sys/crypto/openssl/powerpc64/aesp8-ppc.S b/sys/crypto/openssl/powerpc64/aesp8-ppc.S --- a/sys/crypto/openssl/powerpc64/aesp8-ppc.S +++ b/sys/crypto/openssl/powerpc64/aesp8-ppc.S @@ -10,11 +10,12 @@ .byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 .byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe .Lconsts: mflr 0 bcl 20,31,$+4 mflr 6 - addi 6,6,-0x48 + addi 6,6,-0x58 mtlr 0 blr .long 0 @@ -2364,6 +2365,18 @@ li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -2406,69 +2419,77 @@ vperm 31,31,22,7 lvx 25,3,7 + + + + + + + + vperm 0,2,4,5 subi 10,10,31 vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -2494,6 +2515,8 @@ lvx 25,3,7 bdnz .Loop_xts_enc6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C508 @@ -2503,7 +2526,6 @@ vaddubm 8,8,8 .long 0x11ADC508 .long 0x11CEC508 - vsldoi 11,11,11,15 .long 0x11EFC508 .long 0x1210C508 @@ -2511,7 +2533,8 @@ vand 11,11,10 .long 0x10E7CD08 .long 0x118CCD08 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD08 .long 0x11CECD08 vxor 1,18,31 @@ -2522,13 +2545,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D508 .long 0x118CD508 vand 11,11,10 .long 0x11ADD508 .long 0x11CED508 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD508 .long 0x1210D508 @@ -2542,7 +2565,6 @@ vaddubm 8,8,8 .long 0x10E7DD08 .long 0x118CDD08 - vsldoi 11,11,11,15 .long 0x11ADDD08 .long 0x11CEDD08 vand 11,11,10 @@ -2550,7 +2572,8 @@ .long 0x1210DD08 addi 7,1,64+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E508 .long 0x118CE508 vxor 3,20,31 @@ -2559,7 +2582,6 @@ .long 0x11ADE508 .long 0x11CEE508 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE508 .long 0x1210E508 lvx 24,0,7 @@ -2567,7 +2589,8 @@ .long 0x10E7ED08 .long 0x118CED08 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED08 .long 0x11CEED08 vxor 4,21,31 @@ -2577,14 +2600,14 @@ .long 0x1210ED08 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F508 .long 0x118CF508 vand 11,11,10 .long 0x11ADF508 .long 0x11CEF508 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF508 .long 0x1210F508 vxor 5,22,31 @@ -2594,7 +2617,6 @@ .long 0x10E70509 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D09 .long 0x7C235699 .long 0x11AD1509 @@ -2607,7 +2629,10 @@ .long 0x11EF2509 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x11702D09 @@ -2640,6 +2665,8 @@ mtctr 9 beq .Loop_xts_enc6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_enc6x_zero cmpwi 5,0x20 @@ -3016,6 +3043,18 @@ li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -3063,64 +3102,64 @@ vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -3146,6 +3185,8 @@ lvx 25,3,7 bdnz .Loop_xts_dec6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C548 @@ -3155,7 +3196,6 @@ vaddubm 8,8,8 .long 0x11ADC548 .long 0x11CEC548 - vsldoi 11,11,11,15 .long 0x11EFC548 .long 0x1210C548 @@ -3163,7 +3203,8 @@ vand 11,11,10 .long 0x10E7CD48 .long 0x118CCD48 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD48 .long 0x11CECD48 vxor 1,18,31 @@ -3174,13 +3215,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D548 .long 0x118CD548 vand 11,11,10 .long 0x11ADD548 .long 0x11CED548 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD548 .long 0x1210D548 @@ -3194,7 +3235,6 @@ vaddubm 8,8,8 .long 0x10E7DD48 .long 0x118CDD48 - vsldoi 11,11,11,15 .long 0x11ADDD48 .long 0x11CEDD48 vand 11,11,10 @@ -3202,7 +3242,8 @@ .long 0x1210DD48 addi 7,1,64+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E548 .long 0x118CE548 vxor 3,20,31 @@ -3211,7 +3252,6 @@ .long 0x11ADE548 .long 0x11CEE548 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE548 .long 0x1210E548 lvx 24,0,7 @@ -3219,7 +3259,8 @@ .long 0x10E7ED48 .long 0x118CED48 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED48 .long 0x11CEED48 vxor 4,21,31 @@ -3229,14 +3270,14 @@ .long 0x1210ED48 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F548 .long 0x118CF548 vand 11,11,10 .long 0x11ADF548 .long 0x11CEF548 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF548 .long 0x1210F548 vxor 5,22,31 @@ -3246,7 +3287,6 @@ .long 0x10E70549 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D49 .long 0x7C235699 .long 0x11AD1549 @@ -3259,7 +3299,10 @@ .long 0x11EF2549 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x12102D49 .long 0x7CBD5699 @@ -3290,6 +3333,8 @@ mtctr 9 beq .Loop_xts_dec6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_dec6x_zero cmpwi 5,0x20 diff --git a/sys/crypto/openssl/powerpc64/poly1305-ppc.S b/sys/crypto/openssl/powerpc64/poly1305-ppc.S --- a/sys/crypto/openssl/powerpc64/poly1305-ppc.S +++ b/sys/crypto/openssl/powerpc64/poly1305-ppc.S @@ -380,7 +380,7 @@ .align 5 __poly1305_blocks_vsx: - stdu 1,-432(1) + stdu 1,-416(1) mflr 0 li 10,191 li 11,207 @@ -391,12 +391,12 @@ addi 11,11,32 stvx 22,10,1 addi 10,10,32 - stvx 23,10,1 - addi 10,10,32 - stvx 24,11,1 + stvx 23,11,1 addi 11,11,32 - stvx 25,10,1 + stvx 24,10,1 addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 stvx 26,10,1 addi 10,10,32 stvx 27,11,1 @@ -407,15 +407,15 @@ addi 11,11,32 stvx 30,10,1 stvx 31,11,1 - stw 12,388(1) + stw 12,372(1) li 12,-1 or 12,12,12 - std 27,392(1) - std 28,400(1) - std 29,408(1) - std 30,416(1) - std 31,424(1) - std 0,448(1) + std 27,376(1) + std 28,384(1) + std 29,392(1) + std 30,400(1) + std 31,408(1) + std 0,432(1) bl .LPICmeup @@ -1050,7 +1050,7 @@ .align 4 .Ldone_vsx: - ld 0,448(1) + ld 0,432(1) li 27,4 li 28,8 li 29,12 @@ -1061,39 +1061,39 @@ .long 0x7C7D1919 .long 0x7C9E1919 - lwz 12,388(1) + lwz 12,372(1) mtlr 0 li 10,191 li 11,207 or 12,12,12 lvx 20,10,1 addi 10,10,32 - lvx 21,10,1 - addi 10,10,32 - lvx 22,11,1 + lvx 21,11,1 addi 11,11,32 - lvx 23,10,1 + lvx 22,10,1 addi 10,10,32 - lvx 24,11,1 + lvx 23,11,1 addi 11,11,32 - lvx 25,10,1 + lvx 24,10,1 addi 10,10,32 - lvx 26,11,1 + lvx 25,11,1 addi 11,11,32 - lvx 27,10,1 + lvx 26,10,1 addi 10,10,32 - lvx 28,11,1 + lvx 27,11,1 addi 11,11,32 - lvx 29,10,1 + lvx 28,10,1 addi 10,10,32 - lvx 30,11,1 - lvx 31,10,1 - ld 27,392(1) - ld 28,400(1) - ld 29,408(1) - ld 30,416(1) - ld 31,424(1) - addi 1,1,432 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 27,376(1) + ld 28,384(1) + ld 29,392(1) + ld 30,400(1) + ld 31,408(1) + addi 1,1,416 blr .long 0 .byte 0,12,0x04,1,0x80,5,4,0 diff --git a/sys/crypto/openssl/powerpc64le/aesp8-ppc.S b/sys/crypto/openssl/powerpc64le/aesp8-ppc.S --- a/sys/crypto/openssl/powerpc64le/aesp8-ppc.S +++ b/sys/crypto/openssl/powerpc64le/aesp8-ppc.S @@ -10,11 +10,12 @@ .byte 0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b .byte 0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe .Lconsts: mflr 0 bcl 20,31,$+4 mflr 6 - addi 6,6,-0x48 + addi 6,6,-0x58 mtlr 0 blr .long 0 @@ -2364,6 +2365,18 @@ li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -2406,69 +2419,77 @@ vperm 31,22,31,7 lvx 25,3,7 + + + + + + + + vperm 0,2,4,5 subi 10,10,31 vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 1,1,1,6 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 2,2,2,6 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 3,3,3,6 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 4,4,4,6 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 5,5,5,6 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -2494,6 +2515,8 @@ lvx 25,3,7 bdnz .Loop_xts_enc6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C508 @@ -2503,7 +2526,6 @@ vaddubm 8,8,8 .long 0x11ADC508 .long 0x11CEC508 - vsldoi 11,11,11,15 .long 0x11EFC508 .long 0x1210C508 @@ -2511,7 +2533,8 @@ vand 11,11,10 .long 0x10E7CD08 .long 0x118CCD08 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD08 .long 0x11CECD08 vxor 1,18,31 @@ -2522,13 +2545,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D508 .long 0x118CD508 vand 11,11,10 .long 0x11ADD508 .long 0x11CED508 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD508 .long 0x1210D508 @@ -2542,7 +2565,6 @@ vaddubm 8,8,8 .long 0x10E7DD08 .long 0x118CDD08 - vsldoi 11,11,11,15 .long 0x11ADDD08 .long 0x11CEDD08 vand 11,11,10 @@ -2550,7 +2572,8 @@ .long 0x1210DD08 addi 7,1,64+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E508 .long 0x118CE508 vxor 3,20,31 @@ -2559,7 +2582,6 @@ .long 0x11ADE508 .long 0x11CEE508 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE508 .long 0x1210E508 lvx 24,0,7 @@ -2567,7 +2589,8 @@ .long 0x10E7ED08 .long 0x118CED08 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED08 .long 0x11CEED08 vxor 4,21,31 @@ -2577,14 +2600,14 @@ .long 0x1210ED08 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F508 .long 0x118CF508 vand 11,11,10 .long 0x11ADF508 .long 0x11CEF508 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF508 .long 0x1210F508 vxor 5,22,31 @@ -2594,7 +2617,6 @@ .long 0x10E70509 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D09 .long 0x7C235699 .long 0x11AD1509 @@ -2607,7 +2629,10 @@ .long 0x11EF2509 vperm 2,2,2,6 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x11702D09 vperm 3,3,3,6 @@ -2640,6 +2665,8 @@ mtctr 9 beq .Loop_xts_enc6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_enc6x_zero cmpwi 5,0x20 @@ -3016,6 +3043,18 @@ li 31,0x70 or 0,0,0 + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + subi 9,9,3 lvx 23,0,6 @@ -3063,64 +3102,64 @@ vxor 17,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vand 11,11,10 vxor 7,0,17 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x7C235699 vxor 18,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 1,1,1,6 vand 11,11,10 vxor 12,1,18 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x7C5A5699 andi. 31,5,15 vxor 19,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 2,2,2,6 vand 11,11,10 vxor 13,2,19 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x7C7B5699 sub 5,5,31 vxor 20,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 3,3,3,6 vand 11,11,10 vxor 14,3,20 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x7C9C5699 subi 5,5,0x60 vxor 21,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 4,4,4,6 vand 11,11,10 vxor 15,4,21 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x7CBD5699 addi 10,10,0x60 vxor 22,8,23 vsrab 11,8,9 vaddubm 8,8,8 - vsldoi 11,11,11,15 vperm 5,5,5,6 vand 11,11,10 vxor 16,5,22 - vxor 8,8,11 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 vxor 31,31,23 mtctr 9 @@ -3146,6 +3185,8 @@ lvx 25,3,7 bdnz .Loop_xts_dec6x + xxlor 32+10, 1, 1 + subic 5,5,96 vxor 0,17,31 .long 0x10E7C548 @@ -3155,7 +3196,6 @@ vaddubm 8,8,8 .long 0x11ADC548 .long 0x11CEC548 - vsldoi 11,11,11,15 .long 0x11EFC548 .long 0x1210C548 @@ -3163,7 +3203,8 @@ vand 11,11,10 .long 0x10E7CD48 .long 0x118CCD48 - vxor 8,8,11 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 .long 0x11ADCD48 .long 0x11CECD48 vxor 1,18,31 @@ -3174,13 +3215,13 @@ and 0,0,5 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7D548 .long 0x118CD548 vand 11,11,10 .long 0x11ADD548 .long 0x11CED548 - vxor 8,8,11 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 .long 0x11EFD548 .long 0x1210D548 @@ -3194,7 +3235,6 @@ vaddubm 8,8,8 .long 0x10E7DD48 .long 0x118CDD48 - vsldoi 11,11,11,15 .long 0x11ADDD48 .long 0x11CEDD48 vand 11,11,10 @@ -3202,7 +3242,8 @@ .long 0x1210DD48 addi 7,1,64+15 - vxor 8,8,11 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 .long 0x10E7E548 .long 0x118CE548 vxor 3,20,31 @@ -3211,7 +3252,6 @@ .long 0x11ADE548 .long 0x11CEE548 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x11EFE548 .long 0x1210E548 lvx 24,0,7 @@ -3219,7 +3259,8 @@ .long 0x10E7ED48 .long 0x118CED48 - vxor 8,8,11 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 .long 0x11ADED48 .long 0x11CEED48 vxor 4,21,31 @@ -3229,14 +3270,14 @@ .long 0x1210ED48 lvx 25,3,7 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x10E7F548 .long 0x118CF548 vand 11,11,10 .long 0x11ADF548 .long 0x11CEF548 - vxor 8,8,11 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 .long 0x11EFF548 .long 0x1210F548 vxor 5,22,31 @@ -3246,7 +3287,6 @@ .long 0x10E70549 .long 0x7C005699 vaddubm 8,8,8 - vsldoi 11,11,11,15 .long 0x118C0D49 .long 0x7C235699 .long 0x11AD1549 @@ -3259,7 +3299,10 @@ .long 0x11EF2549 vperm 2,2,2,6 .long 0x7C9C5699 - vxor 8,8,11 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 .long 0x12102D49 vperm 3,3,3,6 .long 0x7CBD5699 @@ -3290,6 +3333,8 @@ mtctr 9 beq .Loop_xts_dec6x + xxlor 32+10, 2, 2 + addic. 5,5,0x60 beq .Lxts_dec6x_zero cmpwi 5,0x20 diff --git a/sys/crypto/openssl/powerpc64le/poly1305-ppc.S b/sys/crypto/openssl/powerpc64le/poly1305-ppc.S --- a/sys/crypto/openssl/powerpc64le/poly1305-ppc.S +++ b/sys/crypto/openssl/powerpc64le/poly1305-ppc.S @@ -366,7 +366,7 @@ .align 5 __poly1305_blocks_vsx: - stdu 1,-432(1) + stdu 1,-416(1) mflr 0 li 10,191 li 11,207 @@ -377,12 +377,12 @@ addi 11,11,32 stvx 22,10,1 addi 10,10,32 - stvx 23,10,1 - addi 10,10,32 - stvx 24,11,1 + stvx 23,11,1 addi 11,11,32 - stvx 25,10,1 + stvx 24,10,1 addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 stvx 26,10,1 addi 10,10,32 stvx 27,11,1 @@ -393,15 +393,15 @@ addi 11,11,32 stvx 30,10,1 stvx 31,11,1 - stw 12,388(1) + stw 12,372(1) li 12,-1 or 12,12,12 - std 27,392(1) - std 28,400(1) - std 29,408(1) - std 30,416(1) - std 31,424(1) - std 0,448(1) + std 27,376(1) + std 28,384(1) + std 29,392(1) + std 30,400(1) + std 31,408(1) + std 0,432(1) bl .LPICmeup @@ -1036,7 +1036,7 @@ .align 4 .Ldone_vsx: - ld 0,448(1) + ld 0,432(1) li 27,4 li 28,8 li 29,12 @@ -1047,39 +1047,39 @@ .long 0x7C7D1919 .long 0x7C9E1919 - lwz 12,388(1) + lwz 12,372(1) mtlr 0 li 10,191 li 11,207 or 12,12,12 lvx 20,10,1 addi 10,10,32 - lvx 21,10,1 - addi 10,10,32 - lvx 22,11,1 + lvx 21,11,1 addi 11,11,32 - lvx 23,10,1 + lvx 22,10,1 addi 10,10,32 - lvx 24,11,1 + lvx 23,11,1 addi 11,11,32 - lvx 25,10,1 + lvx 24,10,1 addi 10,10,32 - lvx 26,11,1 + lvx 25,11,1 addi 11,11,32 - lvx 27,10,1 + lvx 26,10,1 addi 10,10,32 - lvx 28,11,1 + lvx 27,11,1 addi 11,11,32 - lvx 29,10,1 + lvx 28,10,1 addi 10,10,32 - lvx 30,11,1 - lvx 31,10,1 - ld 27,392(1) - ld 28,400(1) - ld 29,408(1) - ld 30,416(1) - ld 31,424(1) - addi 1,1,432 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 27,376(1) + ld 28,384(1) + ld 29,392(1) + ld 30,400(1) + ld 31,408(1) + addi 1,1,416 blr .long 0 .byte 0,12,0x04,1,0x80,5,4,0