Changeset View
Changeset View
Standalone View
Standalone View
module/crypto/zinc/poly1305/poly1305-mips.S
- This file was added.
/* SPDX-License-Identifier: GPL-2.0 OR MIT */ | |||||
/* | |||||
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved. | |||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. | |||||
*/ | |||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||||
#define MSB 0 | |||||
#define LSB 3 | |||||
#else | |||||
#define MSB 3 | |||||
#define LSB 0 | |||||
#endif | |||||
#define POLY1305_BLOCK_SIZE 16 | |||||
.text | |||||
#define H0 $t0 | |||||
#define H1 $t1 | |||||
#define H2 $t2 | |||||
#define H3 $t3 | |||||
#define H4 $t4 | |||||
#define R0 $t5 | |||||
#define R1 $t6 | |||||
#define R2 $t7 | |||||
#define R3 $t8 | |||||
#define O0 $s0 | |||||
#define O1 $s4 | |||||
#define O2 $v1 | |||||
#define O3 $t9 | |||||
#define O4 $s5 | |||||
#define S1 $s1 | |||||
#define S2 $s2 | |||||
#define S3 $s3 | |||||
#define SC $at | |||||
#define CA $v0 | |||||
/* Input arguments */ | |||||
#define poly $a0 | |||||
#define src $a1 | |||||
#define srclen $a2 | |||||
#define hibit $a3 | |||||
/* Location in the opaque buffer | |||||
* R[0..3], CA, H[0..4] | |||||
*/ | |||||
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0) | |||||
#define PTR_POLY1305_CA (16 ) ## ($a0) | |||||
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0) | |||||
#define POLY1305_BLOCK_SIZE 16 | |||||
#define POLY1305_STACK_SIZE 32 | |||||
.set noat | |||||
.align 4 | |||||
.globl poly1305_blocks_mips | |||||
.ent poly1305_blocks_mips | |||||
poly1305_blocks_mips: | |||||
.frame $sp, POLY1305_STACK_SIZE, $ra | |||||
/* srclen &= 0xFFFFFFF0 */ | |||||
ins srclen, $zero, 0, 4 | |||||
addiu $sp, -(POLY1305_STACK_SIZE) | |||||
/* check srclen >= 16 bytes */ | |||||
beqz srclen, .Lpoly1305_blocks_mips_end | |||||
/* Calculate last round based on src address pointer. | |||||
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0) | |||||
*/ | |||||
addu srclen, src | |||||
lw R0, PTR_POLY1305_R(0) | |||||
lw R1, PTR_POLY1305_R(1) | |||||
lw R2, PTR_POLY1305_R(2) | |||||
lw R3, PTR_POLY1305_R(3) | |||||
/* store the used save registers. */ | |||||
sw $s0, 0($sp) | |||||
sw $s1, 4($sp) | |||||
sw $s2, 8($sp) | |||||
sw $s3, 12($sp) | |||||
sw $s4, 16($sp) | |||||
sw $s5, 20($sp) | |||||
/* load Hx and Carry */ | |||||
lw CA, PTR_POLY1305_CA | |||||
lw H0, PTR_POLY1305_H(0) | |||||
lw H1, PTR_POLY1305_H(1) | |||||
lw H2, PTR_POLY1305_H(2) | |||||
lw H3, PTR_POLY1305_H(3) | |||||
lw H4, PTR_POLY1305_H(4) | |||||
/* Sx = Rx + (Rx >> 2) */ | |||||
srl S1, R1, 2 | |||||
srl S2, R2, 2 | |||||
srl S3, R3, 2 | |||||
addu S1, R1 | |||||
addu S2, R2 | |||||
addu S3, R3 | |||||
addiu SC, $zero, 1 | |||||
.Lpoly1305_loop: | |||||
lwl O0, 0+MSB(src) | |||||
lwl O1, 4+MSB(src) | |||||
lwl O2, 8+MSB(src) | |||||
lwl O3,12+MSB(src) | |||||
lwr O0, 0+LSB(src) | |||||
lwr O1, 4+LSB(src) | |||||
lwr O2, 8+LSB(src) | |||||
lwr O3,12+LSB(src) | |||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||||
wsbh O0 | |||||
wsbh O1 | |||||
wsbh O2 | |||||
wsbh O3 | |||||
rotr O0, 16 | |||||
rotr O1, 16 | |||||
rotr O2, 16 | |||||
rotr O3, 16 | |||||
#endif | |||||
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */ | |||||
addu H0, CA | |||||
sltu CA, H0, CA | |||||
addu O0, H0 | |||||
sltu H0, O0, H0 | |||||
addu CA, H0 | |||||
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */ | |||||
addu H1, CA | |||||
sltu CA, H1, CA | |||||
addu O1, H1 | |||||
sltu H1, O1, H1 | |||||
addu CA, H1 | |||||
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */ | |||||
addu H2, CA | |||||
sltu CA, H2, CA | |||||
addu O2, H2 | |||||
sltu H2, O2, H2 | |||||
addu CA, H2 | |||||
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */ | |||||
addu H3, CA | |||||
sltu CA, H3, CA | |||||
addu O3, H3 | |||||
sltu H3, O3, H3 | |||||
addu CA, H3 | |||||
/* h4 += (u32)(d3 >> 32) + padbit; */ | |||||
addu H4, hibit | |||||
addu O4, H4, CA | |||||
/* D0 */ | |||||
multu O0, R0 | |||||
maddu O1, S3 | |||||
maddu O2, S2 | |||||
maddu O3, S1 | |||||
mfhi CA | |||||
mflo H0 | |||||
/* D1 */ | |||||
multu O0, R1 | |||||
maddu O1, R0 | |||||
maddu O2, S3 | |||||
maddu O3, S2 | |||||
maddu O4, S1 | |||||
maddu CA, SC | |||||
mfhi CA | |||||
mflo H1 | |||||
/* D2 */ | |||||
multu O0, R2 | |||||
maddu O1, R1 | |||||
maddu O2, R0 | |||||
maddu O3, S3 | |||||
maddu O4, S2 | |||||
maddu CA, SC | |||||
mfhi CA | |||||
mflo H2 | |||||
/* D4 */ | |||||
mul H4, O4, R0 | |||||
/* D3 */ | |||||
multu O0, R3 | |||||
maddu O1, R2 | |||||
maddu O2, R1 | |||||
maddu O3, R0 | |||||
maddu O4, S3 | |||||
maddu CA, SC | |||||
mfhi CA | |||||
mflo H3 | |||||
addiu src, POLY1305_BLOCK_SIZE | |||||
/* h4 += (u32)(d3 >> 32); */ | |||||
addu O4, H4, CA | |||||
/* h4 &= 3 */ | |||||
andi H4, O4, 3 | |||||
/* c = (h4 >> 2) + (h4 & ~3U); */ | |||||
srl CA, O4, 2 | |||||
ins O4, $zero, 0, 2 | |||||
addu CA, O4 | |||||
/* able to do a 16 byte block. */ | |||||
bne src, srclen, .Lpoly1305_loop | |||||
/* restore the used save registers. */ | |||||
lw $s0, 0($sp) | |||||
lw $s1, 4($sp) | |||||
lw $s2, 8($sp) | |||||
lw $s3, 12($sp) | |||||
lw $s4, 16($sp) | |||||
lw $s5, 20($sp) | |||||
/* store Hx and Carry */ | |||||
sw CA, PTR_POLY1305_CA | |||||
sw H0, PTR_POLY1305_H(0) | |||||
sw H1, PTR_POLY1305_H(1) | |||||
sw H2, PTR_POLY1305_H(2) | |||||
sw H3, PTR_POLY1305_H(3) | |||||
sw H4, PTR_POLY1305_H(4) | |||||
.Lpoly1305_blocks_mips_end: | |||||
addiu $sp, POLY1305_STACK_SIZE | |||||
/* Jump Back */ | |||||
jr $ra | |||||
.end poly1305_blocks_mips | |||||
.set at | |||||
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */ | |||||
#define MAC $a1 | |||||
#define NONCE $a2 | |||||
#define G0 $t5 | |||||
#define G1 $t6 | |||||
#define G2 $t7 | |||||
#define G3 $t8 | |||||
#define G4 $t9 | |||||
.set noat | |||||
.align 4 | |||||
.globl poly1305_emit_mips | |||||
.ent poly1305_emit_mips | |||||
poly1305_emit_mips: | |||||
/* load Hx and Carry */ | |||||
lw CA, PTR_POLY1305_CA | |||||
lw H0, PTR_POLY1305_H(0) | |||||
lw H1, PTR_POLY1305_H(1) | |||||
lw H2, PTR_POLY1305_H(2) | |||||
lw H3, PTR_POLY1305_H(3) | |||||
lw H4, PTR_POLY1305_H(4) | |||||
/* Add left over carry */ | |||||
addu H0, CA | |||||
sltu CA, H0, CA | |||||
addu H1, CA | |||||
sltu CA, H1, CA | |||||
addu H2, CA | |||||
sltu CA, H2, CA | |||||
addu H3, CA | |||||
sltu CA, H3, CA | |||||
addu H4, CA | |||||
/* compare to modulus by computing h + -p */ | |||||
addiu G0, H0, 5 | |||||
sltu CA, G0, H0 | |||||
addu G1, H1, CA | |||||
sltu CA, G1, H1 | |||||
addu G2, H2, CA | |||||
sltu CA, G2, H2 | |||||
addu G3, H3, CA | |||||
sltu CA, G3, H3 | |||||
addu G4, H4, CA | |||||
srl SC, G4, 2 | |||||
/* if there was carry into 131st bit, h3:h0 = g3:g0 */ | |||||
movn H0, G0, SC | |||||
movn H1, G1, SC | |||||
movn H2, G2, SC | |||||
movn H3, G3, SC | |||||
lwl G0, 0+MSB(NONCE) | |||||
lwl G1, 4+MSB(NONCE) | |||||
lwl G2, 8+MSB(NONCE) | |||||
lwl G3,12+MSB(NONCE) | |||||
lwr G0, 0+LSB(NONCE) | |||||
lwr G1, 4+LSB(NONCE) | |||||
lwr G2, 8+LSB(NONCE) | |||||
lwr G3,12+LSB(NONCE) | |||||
/* mac = (h + nonce) % (2^128) */ | |||||
addu H0, G0 | |||||
sltu CA, H0, G0 | |||||
/* H1 */ | |||||
addu H1, CA | |||||
sltu CA, H1, CA | |||||
addu H1, G1 | |||||
sltu G1, H1, G1 | |||||
addu CA, G1 | |||||
/* H2 */ | |||||
addu H2, CA | |||||
sltu CA, H2, CA | |||||
addu H2, G2 | |||||
sltu G2, H2, G2 | |||||
addu CA, G2 | |||||
/* H3 */ | |||||
addu H3, CA | |||||
addu H3, G3 | |||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||||
wsbh H0 | |||||
wsbh H1 | |||||
wsbh H2 | |||||
wsbh H3 | |||||
rotr H0, 16 | |||||
rotr H1, 16 | |||||
rotr H2, 16 | |||||
rotr H3, 16 | |||||
#endif | |||||
/* store MAC */ | |||||
swl H0, 0+MSB(MAC) | |||||
swl H1, 4+MSB(MAC) | |||||
swl H2, 8+MSB(MAC) | |||||
swl H3,12+MSB(MAC) | |||||
swr H0, 0+LSB(MAC) | |||||
swr H1, 4+LSB(MAC) | |||||
swr H2, 8+LSB(MAC) | |||||
swr H3,12+LSB(MAC) | |||||
jr $ra | |||||
.end poly1305_emit_mips | |||||
#define PR0 $t0 | |||||
#define PR1 $t1 | |||||
#define PR2 $t2 | |||||
#define PR3 $t3 | |||||
#define PT0 $t4 | |||||
/* Input arguments CTX=$a0, KEY=$a1 */ | |||||
.align 4 | |||||
.globl poly1305_init_mips | |||||
.ent poly1305_init_mips | |||||
poly1305_init_mips: | |||||
lwl PR0, 0+MSB($a1) | |||||
lwl PR1, 4+MSB($a1) | |||||
lwl PR2, 8+MSB($a1) | |||||
lwl PR3,12+MSB($a1) | |||||
lwr PR0, 0+LSB($a1) | |||||
lwr PR1, 4+LSB($a1) | |||||
lwr PR2, 8+LSB($a1) | |||||
lwr PR3,12+LSB($a1) | |||||
/* store Hx and Carry */ | |||||
sw $zero, PTR_POLY1305_CA | |||||
sw $zero, PTR_POLY1305_H(0) | |||||
sw $zero, PTR_POLY1305_H(1) | |||||
sw $zero, PTR_POLY1305_H(2) | |||||
sw $zero, PTR_POLY1305_H(3) | |||||
sw $zero, PTR_POLY1305_H(4) | |||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |||||
wsbh PR0 | |||||
wsbh PR1 | |||||
wsbh PR2 | |||||
wsbh PR3 | |||||
rotr PR0, 16 | |||||
rotr PR1, 16 | |||||
rotr PR2, 16 | |||||
rotr PR3, 16 | |||||
#endif | |||||
lui PT0, 0x0FFF | |||||
ori PT0, 0xFFFC | |||||
/* AND 0x0fffffff; */ | |||||
ext PR0, PR0, 0, (32-4) | |||||
/* AND 0x0ffffffc; */ | |||||
and PR1, PT0 | |||||
and PR2, PT0 | |||||
and PR3, PT0 | |||||
/* store Rx */ | |||||
sw PR0, PTR_POLY1305_R(0) | |||||
sw PR1, PTR_POLY1305_R(1) | |||||
sw PR2, PTR_POLY1305_R(2) | |||||
sw PR3, PTR_POLY1305_R(3) | |||||
/* Jump Back */ | |||||
jr $ra | |||||
.end poly1305_init_mips |