Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/if_wg/module/crypto/zinc/poly1305/poly1305-mips64.pl
- This file was added.
#!/usr/bin/env perl | |||||
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause | |||||
# | |||||
# This code is taken from the OpenSSL project but the author, Andy Polyakov, | |||||
# has relicensed it under the licenses specified in the SPDX header above. | |||||
# The original headers, including the original license headers, are | |||||
# included below for completeness. | |||||
# | |||||
# ==================================================================== | |||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |||||
# project. The module is, however, dual licensed under OpenSSL and | |||||
# CRYPTOGAMS licenses depending on where you obtain it. For further | |||||
# details see http://www.openssl.org/~appro/cryptogams/. | |||||
# ==================================================================== | |||||
# | |||||
# Poly1305 hash for MIPS64. | |||||
# | |||||
# May 2016 | |||||
# | |||||
# Numbers are cycles per processed byte with poly1305_blocks alone. | |||||
# | |||||
# IALU/gcc | |||||
# R1x000 5.64/+120% (big-endian) | |||||
# Octeon II 3.80/+280% (little-endian) | |||||
###################################################################### | |||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most | |||||
# widely used. Then there is a new contender: NUBI. It appears that if | |||||
# one picks the latter, it's possible to arrange code in ABI neutral | |||||
# manner. Therefore let's stick to NUBI register layout: | |||||
# | |||||
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); | |||||
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |||||
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); | |||||
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); | |||||
# | |||||
# The return value is placed in $a0. Following coding rules facilitate | |||||
# interoperability: | |||||
# | |||||
# - never ever touch $tp, "thread pointer", former $gp [o32 can be | |||||
# excluded from the rule, because it's specified volatile]; | |||||
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting | |||||
# old code]; | |||||
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; | |||||
# | |||||
# For reference here is register layout for N32/64 MIPS ABIs: | |||||
# | |||||
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); | |||||
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |||||
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); | |||||
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); | |||||
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); | |||||
# | |||||
# <appro@openssl.org> | |||||
# | |||||
###################################################################### | |||||
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 | |||||
die "MIPS64 only" unless ($flavour =~ /64|n32/i); | |||||
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; | |||||
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; | |||||
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); | |||||
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); | |||||
$code.=<<___; | |||||
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ | |||||
defined(_MIPS_ARCH_MIPS64R6)) \\ | |||||
&& !defined(_MIPS_ARCH_MIPS64R2) | |||||
# define _MIPS_ARCH_MIPS64R2 | |||||
#endif | |||||
#if defined(_MIPS_ARCH_MIPS64R6) | |||||
# define dmultu(rs,rt) | |||||
# define mflo(rd,rs,rt) dmulu rd,rs,rt | |||||
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt | |||||
#else | |||||
# define dmultu(rs,rt) dmultu rs,rt | |||||
# define mflo(rd,rs,rt) mflo rd | |||||
# define mfhi(rd,rs,rt) mfhi rd | |||||
#endif | |||||
#ifdef __KERNEL__ | |||||
# define poly1305_init poly1305_init_mips | |||||
# define poly1305_blocks poly1305_blocks_mips | |||||
# define poly1305_emit poly1305_emit_mips | |||||
#endif | |||||
#if defined(__MIPSEB__) && !defined(MIPSEB) | |||||
# define MIPSEB | |||||
#endif | |||||
#ifdef MIPSEB | |||||
# define MSB 0 | |||||
# define LSB 7 | |||||
#else | |||||
# define MSB 7 | |||||
# define LSB 0 | |||||
#endif | |||||
.text | |||||
.set noat | |||||
.set noreorder | |||||
.align 5 | |||||
.globl poly1305_init | |||||
.ent poly1305_init | |||||
poly1305_init: | |||||
.frame $sp,0,$ra | |||||
.set reorder | |||||
sd $zero,0($ctx) | |||||
sd $zero,8($ctx) | |||||
sd $zero,16($ctx) | |||||
beqz $inp,.Lno_key | |||||
#if defined(_MIPS_ARCH_MIPS64R6) | |||||
ld $in0,0($inp) | |||||
ld $in1,8($inp) | |||||
#else | |||||
ldl $in0,0+MSB($inp) | |||||
ldl $in1,8+MSB($inp) | |||||
ldr $in0,0+LSB($inp) | |||||
ldr $in1,8+LSB($inp) | |||||
#endif | |||||
#ifdef MIPSEB | |||||
# if defined(_MIPS_ARCH_MIPS64R2) | |||||
dsbh $in0,$in0 # byte swap | |||||
dsbh $in1,$in1 | |||||
dshd $in0,$in0 | |||||
dshd $in1,$in1 | |||||
# else | |||||
ori $tmp0,$zero,0xFF | |||||
dsll $tmp2,$tmp0,32 | |||||
or $tmp0,$tmp2 # 0x000000FF000000FF | |||||
and $tmp1,$in0,$tmp0 # byte swap | |||||
and $tmp3,$in1,$tmp0 | |||||
dsrl $tmp2,$in0,24 | |||||
dsrl $tmp4,$in1,24 | |||||
dsll $tmp1,24 | |||||
dsll $tmp3,24 | |||||
and $tmp2,$tmp0 | |||||
and $tmp4,$tmp0 | |||||
dsll $tmp0,8 # 0x0000FF000000FF00 | |||||
or $tmp1,$tmp2 | |||||
or $tmp3,$tmp4 | |||||
and $tmp2,$in0,$tmp0 | |||||
and $tmp4,$in1,$tmp0 | |||||
dsrl $in0,8 | |||||
dsrl $in1,8 | |||||
dsll $tmp2,8 | |||||
dsll $tmp4,8 | |||||
and $in0,$tmp0 | |||||
and $in1,$tmp0 | |||||
or $tmp1,$tmp2 | |||||
or $tmp3,$tmp4 | |||||
or $in0,$tmp1 | |||||
or $in1,$tmp3 | |||||
dsrl $tmp1,$in0,32 | |||||
dsrl $tmp3,$in1,32 | |||||
dsll $in0,32 | |||||
dsll $in1,32 | |||||
or $in0,$tmp1 | |||||
or $in1,$tmp3 | |||||
# endif | |||||
#endif | |||||
li $tmp0,1 | |||||
dsll $tmp0,32 | |||||
daddiu $tmp0,-63 | |||||
dsll $tmp0,28 | |||||
daddiu $tmp0,-1 # 0ffffffc0fffffff | |||||
and $in0,$tmp0 | |||||
daddiu $tmp0,-3 # 0ffffffc0ffffffc | |||||
and $in1,$tmp0 | |||||
sd $in0,24($ctx) | |||||
dsrl $tmp0,$in1,2 | |||||
sd $in1,32($ctx) | |||||
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) | |||||
sd $tmp0,40($ctx) | |||||
.Lno_key: | |||||
li $v0,0 # return 0 | |||||
jr $ra | |||||
.end poly1305_init | |||||
___ | |||||
{ | |||||
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) = | |||||
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); | |||||
$code.=<<___; | |||||
.align 5 | |||||
.globl poly1305_blocks | |||||
.ent poly1305_blocks | |||||
poly1305_blocks: | |||||
.set noreorder | |||||
dsrl $len,4 # number of complete blocks | |||||
bnez $len,poly1305_blocks_internal | |||||
nop | |||||
jr $ra | |||||
nop | |||||
.end poly1305_blocks | |||||
.align 5 | |||||
.ent poly1305_blocks_internal | |||||
poly1305_blocks_internal: | |||||
.frame $sp,6*8,$ra | |||||
.mask $SAVED_REGS_MASK,-8 | |||||
.set noreorder | |||||
dsubu $sp,6*8 | |||||
sd $s5,40($sp) | |||||
sd $s4,32($sp) | |||||
___ | |||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |||||
sd $s3,24($sp) | |||||
sd $s2,16($sp) | |||||
sd $s1,8($sp) | |||||
sd $s0,0($sp) | |||||
___ | |||||
$code.=<<___; | |||||
.set reorder | |||||
ld $h0,0($ctx) # load hash value | |||||
ld $h1,8($ctx) | |||||
ld $h2,16($ctx) | |||||
ld $r0,24($ctx) # load key | |||||
ld $r1,32($ctx) | |||||
ld $s1,40($ctx) | |||||
.Loop: | |||||
#if defined(_MIPS_ARCH_MIPS64R6) | |||||
ld $in0,0($inp) # load input | |||||
ld $in1,8($inp) | |||||
#else | |||||
ldl $in0,0+MSB($inp) # load input | |||||
ldl $in1,8+MSB($inp) | |||||
ldr $in0,0+LSB($inp) | |||||
ldr $in1,8+LSB($inp) | |||||
#endif | |||||
daddiu $len,-1 | |||||
daddiu $inp,16 | |||||
#ifdef MIPSEB | |||||
# if defined(_MIPS_ARCH_MIPS64R2) | |||||
dsbh $in0,$in0 # byte swap | |||||
dsbh $in1,$in1 | |||||
dshd $in0,$in0 | |||||
dshd $in1,$in1 | |||||
# else | |||||
ori $tmp0,$zero,0xFF | |||||
dsll $tmp2,$tmp0,32 | |||||
or $tmp0,$tmp2 # 0x000000FF000000FF | |||||
and $tmp1,$in0,$tmp0 # byte swap | |||||
and $tmp3,$in1,$tmp0 | |||||
dsrl $tmp2,$in0,24 | |||||
dsrl $tmp4,$in1,24 | |||||
dsll $tmp1,24 | |||||
dsll $tmp3,24 | |||||
and $tmp2,$tmp0 | |||||
and $tmp4,$tmp0 | |||||
dsll $tmp0,8 # 0x0000FF000000FF00 | |||||
or $tmp1,$tmp2 | |||||
or $tmp3,$tmp4 | |||||
and $tmp2,$in0,$tmp0 | |||||
and $tmp4,$in1,$tmp0 | |||||
dsrl $in0,8 | |||||
dsrl $in1,8 | |||||
dsll $tmp2,8 | |||||
dsll $tmp4,8 | |||||
and $in0,$tmp0 | |||||
and $in1,$tmp0 | |||||
or $tmp1,$tmp2 | |||||
or $tmp3,$tmp4 | |||||
or $in0,$tmp1 | |||||
or $in1,$tmp3 | |||||
dsrl $tmp1,$in0,32 | |||||
dsrl $tmp3,$in1,32 | |||||
dsll $in0,32 | |||||
dsll $in1,32 | |||||
or $in0,$tmp1 | |||||
or $in1,$tmp3 | |||||
# endif | |||||
#endif | |||||
daddu $h0,$in0 # accumulate input | |||||
daddu $h1,$in1 | |||||
sltu $tmp0,$h0,$in0 | |||||
sltu $tmp1,$h1,$in1 | |||||
daddu $h1,$tmp0 | |||||
dmultu ($r0,$h0) # h0*r0 | |||||
daddu $h2,$padbit | |||||
sltu $tmp0,$h1,$tmp0 | |||||
mflo ($d0,$r0,$h0) | |||||
mfhi ($d1,$r0,$h0) | |||||
dmultu ($s1,$h1) # h1*5*r1 | |||||
daddu $tmp0,$tmp1 | |||||
daddu $h2,$tmp0 | |||||
mflo ($tmp0,$s1,$h1) | |||||
mfhi ($tmp1,$s1,$h1) | |||||
dmultu ($r1,$h0) # h0*r1 | |||||
daddu $d0,$tmp0 | |||||
daddu $d1,$tmp1 | |||||
mflo ($tmp2,$r1,$h0) | |||||
mfhi ($d2,$r1,$h0) | |||||
sltu $tmp0,$d0,$tmp0 | |||||
daddu $d1,$tmp0 | |||||
dmultu ($r0,$h1) # h1*r0 | |||||
daddu $d1,$tmp2 | |||||
sltu $tmp2,$d1,$tmp2 | |||||
mflo ($tmp0,$r0,$h1) | |||||
mfhi ($tmp1,$r0,$h1) | |||||
daddu $d2,$tmp2 | |||||
dmultu ($s1,$h2) # h2*5*r1 | |||||
daddu $d1,$tmp0 | |||||
daddu $d2,$tmp1 | |||||
mflo ($tmp2,$s1,$h2) | |||||
dmultu ($r0,$h2) # h2*r0 | |||||
sltu $tmp0,$d1,$tmp0 | |||||
daddu $d2,$tmp0 | |||||
mflo ($tmp3,$r0,$h2) | |||||
daddu $d1,$tmp2 | |||||
daddu $d2,$tmp3 | |||||
sltu $tmp2,$d1,$tmp2 | |||||
daddu $d2,$tmp2 | |||||
li $tmp0,-4 # final reduction | |||||
and $tmp0,$d2 | |||||
dsrl $tmp1,$d2,2 | |||||
andi $h2,$d2,3 | |||||
daddu $tmp0,$tmp1 | |||||
daddu $h0,$d0,$tmp0 | |||||
sltu $tmp0,$h0,$tmp0 | |||||
daddu $h1,$d1,$tmp0 | |||||
sltu $tmp0,$h1,$tmp0 | |||||
daddu $h2,$h2,$tmp0 | |||||
bnez $len,.Loop | |||||
sd $h0,0($ctx) # store hash value | |||||
sd $h1,8($ctx) | |||||
sd $h2,16($ctx) | |||||
.set noreorder | |||||
ld $s5,40($sp) # epilogue | |||||
ld $s4,32($sp) | |||||
___ | |||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue | |||||
ld $s3,24($sp) | |||||
ld $s2,16($sp) | |||||
ld $s1,8($sp) | |||||
ld $s0,0($sp) | |||||
___ | |||||
$code.=<<___; | |||||
jr $ra | |||||
daddu $sp,6*8 | |||||
.end poly1305_blocks_internal | |||||
___ | |||||
} | |||||
{ | |||||
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); | |||||
$code.=<<___; | |||||
.align 5 | |||||
.globl poly1305_emit | |||||
.ent poly1305_emit | |||||
poly1305_emit: | |||||
.frame $sp,0,$ra | |||||
.set reorder | |||||
ld $tmp0,0($ctx) | |||||
ld $tmp1,8($ctx) | |||||
ld $tmp2,16($ctx) | |||||
daddiu $in0,$tmp0,5 # compare to modulus | |||||
sltiu $tmp3,$in0,5 | |||||
daddu $in1,$tmp1,$tmp3 | |||||
sltu $tmp3,$in1,$tmp3 | |||||
daddu $tmp2,$tmp2,$tmp3 | |||||
dsrl $tmp2,2 # see if it carried/borrowed | |||||
dsubu $tmp2,$zero,$tmp2 | |||||
nor $tmp3,$zero,$tmp2 | |||||
and $in0,$tmp2 | |||||
and $tmp0,$tmp3 | |||||
and $in1,$tmp2 | |||||
and $tmp1,$tmp3 | |||||
or $in0,$tmp0 | |||||
or $in1,$tmp1 | |||||
lwu $tmp0,0($nonce) # load nonce | |||||
lwu $tmp1,4($nonce) | |||||
lwu $tmp2,8($nonce) | |||||
lwu $tmp3,12($nonce) | |||||
dsll $tmp1,32 | |||||
dsll $tmp3,32 | |||||
or $tmp0,$tmp1 | |||||
or $tmp2,$tmp3 | |||||
daddu $in0,$tmp0 # accumulate nonce | |||||
daddu $in1,$tmp2 | |||||
sltu $tmp0,$in0,$tmp0 | |||||
daddu $in1,$tmp0 | |||||
dsrl $tmp0,$in0,8 # write mac value | |||||
dsrl $tmp1,$in0,16 | |||||
dsrl $tmp2,$in0,24 | |||||
sb $in0,0($mac) | |||||
dsrl $tmp3,$in0,32 | |||||
sb $tmp0,1($mac) | |||||
dsrl $tmp0,$in0,40 | |||||
sb $tmp1,2($mac) | |||||
dsrl $tmp1,$in0,48 | |||||
sb $tmp2,3($mac) | |||||
dsrl $tmp2,$in0,56 | |||||
sb $tmp3,4($mac) | |||||
dsrl $tmp3,$in1,8 | |||||
sb $tmp0,5($mac) | |||||
dsrl $tmp0,$in1,16 | |||||
sb $tmp1,6($mac) | |||||
dsrl $tmp1,$in1,24 | |||||
sb $tmp2,7($mac) | |||||
sb $in1,8($mac) | |||||
dsrl $tmp2,$in1,32 | |||||
sb $tmp3,9($mac) | |||||
dsrl $tmp3,$in1,40 | |||||
sb $tmp0,10($mac) | |||||
dsrl $tmp0,$in1,48 | |||||
sb $tmp1,11($mac) | |||||
dsrl $tmp1,$in1,56 | |||||
sb $tmp2,12($mac) | |||||
sb $tmp3,13($mac) | |||||
sb $tmp0,14($mac) | |||||
sb $tmp1,15($mac) | |||||
jr $ra | |||||
.end poly1305_emit | |||||
.rdata | |||||
.align 2 | |||||
___ | |||||
} | |||||
open SELF,$0; | |||||
while(<SELF>) { | |||||
next if (/^#!/); | |||||
last if (!s/^#/\/\// and !/^$/); | |||||
print; | |||||
} | |||||
close SELF; | |||||
$output=pop and open STDOUT,">$output"; | |||||
print $code; | |||||
close STDOUT; | |||||