Index: sys/crypto/skein/amd64/skein_block_asm.s =================================================================== --- sys/crypto/skein/amd64/skein_block_asm.s +++ sys/crypto/skein/amd64/skein_block_asm.s @@ -10,7 +10,9 @@ # .text .altmacro +#ifndef __clang__ .psize 0,128 #list file has no page boundaries +#endif # _MASK_ALL_ = (256+512+1024) #all three algorithm bits _MAX_FRAME_ = 240 @@ -240,9 +242,8 @@ # Output: <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024 # .macro RotL64 reg,BLK_SIZE,ROUND_NUM,MIX_NUM -_RCNT_ = RC_\BLK_SIZE&_\ROUND_NUM&_\MIX_NUM - .if _RCNT_ #is there anything to do? - rolq $_RCNT_,%\reg + .if RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM #is there anything to do? + rolq $RC_\BLK_SIZE\()_\ROUND_NUM\()_\MIX_NUM,%\reg .endif .endm # @@ -334,7 +335,7 @@ #---------------------------------------------------------------- # .macro Reset_Stack - addq $LOCAL_SIZE,%rsp #get rid of locals (wipe??) + addq $LOCAL_SIZE,%rsp #get rid of locals (wipe?) .irp _reg_,r15,r14,r13,r12,rbx,rbp popq %\_reg_ #restore caller's regs _PushCnt_ = _PushCnt_ - 1 @@ -724,22 +725,22 @@ .macro R_512_OneRound rn0,rn1,rn2,rn3,rn4,rn5,rn6,rn7,_Rn_,op1,op2,op3,op4 # addReg r\rn0, r\rn1 - RotL64 r\rn1, 512,%((_Rn_) % 8),0 + RotL64 r\rn1, 512,%((\_Rn_) % 8),0 xorReg r\rn1, r\rn0 - op1 + \op1 addReg r\rn2, r\rn3 - RotL64 r\rn3, 512,%((_Rn_) % 8),1 + RotL64 r\rn3, 512,%((\_Rn_) % 8),1 xorReg r\rn3, r\rn2 - op2 + \op2 addReg r\rn4, r\rn5 - RotL64 r\rn5, 512,%((_Rn_) % 8),2 + RotL64 r\rn5, 512,%((\_Rn_) % 8),2 xorReg r\rn5, r\rn4 - op3 + \op3 addReg r\rn6, r\rn7 - RotL64 r\rn7, 512,%((_Rn_) % 8),3 + RotL64 r\rn7, 512,%((\_Rn_) % 8),3 xorReg r\rn7, r\rn6 - op4 - Skein_Debug_Round 512,%(_Rn_+1),-4 + \op4 + Skein_Debug_Round 512,%(\_Rn_+1),-4 # .endm #R_512_OneRound # @@ -749,11 +750,11 @@ .macro R_512_FourRounds _RR_ #RR = base round number (0 % 8) .if (SKEIN_ASM_UNROLL && 512) # here for fully unrolled case. - _II_ = ((_RR_)/4) + 1 #key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + _II_ = ((\_RR_)/4) + 1 #key injection counter + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),, # inject the key schedule addq ksKey+8*(((_II_)+0)%9)+F_O(%rbp),%r8 addReg r11, rax @@ -766,10 +767,10 @@ .else # here for looping case #"rotate" key/tweak schedule (move up on stack) incq %rdi #bump key injection counter - R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, - R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, - R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, - R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + R_512_OneRound 8, 9,10,11,12,13,14,15,%((\_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((\_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((\_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((\_RR_)+3),, # inject the key schedule addq ksKey+8*0+F_O(%rbp,%rdi,8),%r8 addReg r11, rax @@ -814,9 +815,9 @@ movq %rbx,ksTwk+ 8+F_O(%rbp) movq %rcx,ksTwk+16+F_O(%rbp) .irp _Rn_,8,9,10,11,12,13,14,15 - movq X_VARS+8*(_Rn_-8)(%rdi),%r\_Rn_ + movq X_VARS+8*(\_Rn_-8)(%rdi),%r\_Rn_ xorq %r\_Rn_,%rdx #compute overall parity - movq %r\_Rn_,ksKey+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,ksKey+8*(\_Rn_-8)+F_O(%rbp) .endr #load state into %r8 ..%r15, compute parity movq %rdx,ksKey+8*(8)+F_O(%rbp)#save key schedule parity @@ -853,7 +854,7 @@ .if _SKEIN_DEBUG .irp _Rn_,8,9,10,11,12,13,14,15 #save values on stack for debug output - movq %r\_Rn_,X_stk+8*(_Rn_-8)(%rsp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)(%rsp) .endr Skein_Debug_Block 512 #debug dump @@ -891,12 +892,12 @@ ################# # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} .irp _Rn_,8,9,10,11,12,13,14,15 - .if (_Rn_ == 8) + .if (\_Rn_ == 8) movq $FIRST_MASK64,%rbx .endif - xorq Wcopy+8*(_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR - movq %r\_Rn_,X_VARS+8*(_Rn_-8)(%rdi) #and store result - .if (_Rn_ == 14) + xorq Wcopy+8*(\_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR + movq %r\_Rn_,X_VARS+8*(\_Rn_-8)(%rdi) #and store result + .if (\_Rn_ == 14) andq TWEAK+ 8(%rdi),%rbx .endif .endr @@ -917,7 +918,7 @@ pushq %rsi #save two regs for BLK_BITS-specific parms pushq %rdi .irp _Rn_,8,9,10,11,12,13,14,15 #save X[] state on stack so debug routines can access it - movq %r\_Rn_,X_stk+8*(_Rn_-8)+F_O(%rbp) + movq %r\_Rn_,X_stk+8*(\_Rn_-8)+F_O(%rbp) .endr movq ctxPtr+F_O(%rbp),%rsi #ctx_hdr_ptr movq $512,%rdi #now are set for the call @@ -968,50 +969,50 @@ # .macro r1024_Mix w0,w1,reg0,reg1,_RN0_,_Rn1_,op1 addReg \reg0 , \reg1 #perform the MIX - RotL64 \reg1 , 1024,%((_RN0_) % 8),_Rn1_ + RotL64 \reg1 , 1024,%((\_RN0_) % 8),\_Rn1_ xorReg \reg1 , \reg0 -.if ((_RN0_) && 3) == 3 #time to do key injection? +.if ((\_RN0_) && 3) == 3 #time to do key injection? .if _SKEIN_DEBUG - movq %\reg0 , xDebug_1024+8*w0(%rsp) #save intermediate values for Debug_Round - movq %\reg1 , xDebug_1024+8*w1(%rsp) # (before inline key injection) + movq %\reg0 , xDebug_1024+8*\w0(%rsp) #save intermediate values for Debug_Round + movq %\reg1 , xDebug_1024+8*\w1(%rsp) # (before inline key injection) .endif -_II_ = ((_RN0_)/4)+1 #injection count +_II_ = ((\_RN0_)/4)+1 #injection count .if SKEIN_ASM_UNROLL && 1024 #here to do fully unrolled key injection - addq ksKey+ 8*((_II_+w0) % 17)(%rsp),%\reg0 - addq ksKey+ 8*((_II_+w1) % 17)(%rsp),%\reg1 - .if w1 == 13 #tweak injection + addq ksKey+ 8*((_II_+\w0) % 17)(%rsp),%\reg0 + addq ksKey+ 8*((_II_+\w1) % 17)(%rsp),%\reg1 + .if \w1 == 13 #tweak injection addq ksTwk+ 8*((_II_+ 0) % 3)(%rsp),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+ 8*((_II_+ 1) % 3)(%rsp),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addq $_II_, %\reg1 #(injection counter) .endif .else #here to do looping key injection - .if (w0 == 0) - movq %rdi, X_stk+8*w0(%rsp) #if so, store N0 so we can use reg as index + .if (\w0 == 0) + movq %rdi, X_stk+8*\w0(%rsp) #if so, store N0 so we can use reg as index movq rIdx_offs(%rsp),%rdi #get the injection counter index into rdi .else - addq ksKey+8+8*w0(%rsp,%rdi,8),%\reg0 #even key injection + addq ksKey+8+8*\w0(%rsp,%rdi,8),%\reg0 #even key injection .endif - .if w1 == 13 #tweak injection + .if \w1 == 13 #tweak injection addq ksTwk+8+8* 0(%rsp,%rdi,8),%\reg1 - .elseif w0 == 14 + .elseif \w0 == 14 addq ksTwk+8+8* 1(%rsp,%rdi,8),%\reg0 - .elseif w1 == 15 + .elseif \w1 == 15 addReg \reg1,rdi,,,1 #(injection counter) .endif - addq ksKey+8+8*w1(%rsp,%rdi,8),%\reg1 #odd key injection + addq ksKey+8+8*\w1(%rsp,%rdi,8),%\reg1 #odd key injection .endif .endif # insert the op provided, .if any - op1 + \op1 .endm ################# # MACRO: four rounds for 1024-bit blocks # .macro r1024_FourRounds _RR_ #RR = base round number (0 mod 4) # should be here with X4 set properly, X6 stored on stack -_Rn_ = (_RR_) + 0 +_Rn_ = (\_RR_) + 0 r1024_Mix 0, 1,rdi,rsi,_Rn_,0 r1024_Mix 2, 3,rbp,rax,_Rn_,1 r1024_Mix 4, 5,rcx,rbx,_Rn_,2, #save X4 on stack (x4/x6 alternate) @@ -1023,7 +1024,7 @@ .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 1 +_Rn_ = (\_RR_) + 1 r1024_Mix 0, 9,rdi,r9 ,_Rn_,0 r1024_Mix 2,13,rbp,r13,_Rn_,1 r1024_Mix 6,11,rcx,r11,_Rn_,2, #save X6 on stack (x4/x6 alternate) @@ -1035,7 +1036,7 @@ .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 2 +_Rn_ = (\_RR_) + 2 r1024_Mix 0, 7,rdi,rdx,_Rn_,0 r1024_Mix 2, 5,rbp,rbx,_Rn_,1 r1024_Mix 4, 3,rcx,rax,_Rn_,2, #save X4 on stack (x4/x6 alternate) @@ -1047,7 +1048,7 @@ .if _SKEIN_DEBUG Skein_Debug_Round 1024,%(_Rn_+1) .endif -_Rn_ = (_RR_) + 3 +_Rn_ = (\_RR_) + 3 r1024_Mix 0,15,rdi,r15,_Rn_,0 r1024_Mix 2,11,rbp,r11,_Rn_,1 r1024_Mix 6,13,rcx,r13,_Rn_,2, #save X6 on stack (x4/x6 alternate) @@ -1116,13 +1117,13 @@ # the logic here assumes the set {rdi,rsi,rbp,rax} = X[0,1,2,3] .irp _rN_,0,1,2,3,4,6 #process the "initial" words, using r14/r15 as temps - movq X_VARS+8*_rN_(%rdi),%r14 #get state word - movq 8*_rN_(%rsi),%r15 #get msg word + movq X_VARS+8*\_rN_(%rdi),%r14 #get state word + movq 8*\_rN_(%rsi),%r15 #get msg word xorq %r14,%rax #update key schedule overall parity - movq %r14,ksKey +8*_rN_+F_O(%rbp) #save key schedule word on stack - movq %r15,Wcopy +8*_rN_+F_O(%rbp) #save local msg Wcopy + movq %r14,ksKey +8*\_rN_+F_O(%rbp) #save key schedule word on stack + movq %r15,Wcopy +8*\_rN_+F_O(%rbp) #save local msg Wcopy addq %r15,%r14 #do the initial key injection - movq %r14,X_stk +8*_rN_ (%rsp) #save initial state var on stack + movq %r14,X_stk +8*\_rN_ (%rsp) #save initial state var on stack .endr # now process the rest, using the "real" registers # (MUST do it in reverse order to inject tweaks r8/r9 first) @@ -1135,9 +1136,9 @@ movq %rcx,Wcopy+8*_oo_+F_O(%rbp) #save copy of msg word for feedforward addq %rcx,%\_rr_ #do the initial key injection .if _oo_ == 13 #do the initial tweak injection - addReg _rr_,r8 # (only in words 13/14) + addReg \_rr_,r8 # (only in words 13/14) .elseif _oo_ == 14 - addReg _rr_,r9 + addReg \_rr_,r9 .endif .endr movq %rax,ksKey+8*WCNT+F_O(%rbp) #save key schedule parity