From aa4d426b4d3527d7e166df1a05058c9a4a0f6683 Mon Sep 17 00:00:00 2001
From: Wojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>
Date: Fri, 30 Apr 2021 00:33:56 +0200
Subject: initial/final commit

---
 openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl | 329 ++++++++++++++++++++++++++++
 1 file changed, 329 insertions(+)
 create mode 100644 openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl

(limited to 'openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl')

diff --git a/openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl b/openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl
new file mode 100644
index 0000000..4124958
--- /dev/null
+++ b/openssl-1.1.0h/crypto/sha/asm/sha1-alpha.pl
@@ -0,0 +1,329 @@
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# SHA1 block procedure for Alpha.
+
+# On 21264 performance is 33% better than code generated by vendor
+# compiler, and 75% better than GCC [3.4], and in absolute terms is
+# 8.7 cycles per processed byte. Implementation features vectorized
+# byte swap, but not Xupdate.
+
+@X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
+	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
+$ctx="a0";	# $16
+$inp="a1";
+$num="a2";
+$A="a3";
+$B="a4";	# 20
+$C="a5";
+$D="t8";
+$E="t9";	@V=($A,$B,$C,$D,$E);
+$t0="t10";	# 24
+$t1="t11";
+$t2="ra";
+$t3="t12";
+$K="AT";	# 28
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+$code.=<<___ if ($i==0);
+	ldq_u	@X[0],0+0($inp)
+	ldq_u	@X[1],0+7($inp)
+___
+$code.=<<___ if (!($i&1) && $i<14);
+	ldq_u	@X[$i+2],($i+2)*4+0($inp)
+	ldq_u	@X[$i+3],($i+2)*4+7($inp)
+___
+$code.=<<___ if (!($i&1) && $i<15);
+	extql	@X[$i],$inp,@X[$i]
+	extqh	@X[$i+1],$inp,@X[$i+1]
+
+	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
+
+	srl	@X[$i],24,$t0		# vectorized byte swap
+	srl	@X[$i],8,$t2
+
+	sll	@X[$i],8,$t3
+	sll	@X[$i],24,@X[$i]
+	zapnot	$t0,0x11,$t0
+	zapnot	$t2,0x22,$t2
+
+	zapnot	@X[$i],0x88,@X[$i]
+	or	$t0,$t2,$t0
+	zapnot	$t3,0x44,$t3
+	sll	$a,5,$t1
+
+	or	@X[$i],$t0,@X[$i]
+	addl	$K,$e,$e
+	and	$b,$c,$t2
+	zapnot	$a,0xf,$a
+
+	or	@X[$i],$t3,@X[$i]
+	srl	$a,27,$t0
+	bic	$d,$b,$t3
+	sll	$b,30,$b
+
+	extll	@X[$i],4,@X[$i+1]	# extract upper half
+	or	$t2,$t3,$t2
+	addl	@X[$i],$e,$e
+
+	addl	$t1,$e,$e
+	srl	$b,32,$t3
+	zapnot	@X[$i],0xf,@X[$i]
+
+	addl	$t0,$e,$e
+	addl	$t2,$e,$e
+	or	$t3,$b,$b
+___
+$code.=<<___ if (($i&1) && $i<15);
+	sll	$a,5,$t1
+	addl	$K,$e,$e
+	and	$b,$c,$t2
+	zapnot	$a,0xf,$a
+
+	srl	$a,27,$t0
+	addl	@X[$i%16],$e,$e
+	bic	$d,$b,$t3
+	sll	$b,30,$b
+
+	or	$t2,$t3,$t2
+	addl	$t1,$e,$e
+	srl	$b,32,$t3
+	zapnot	@X[$i],0xf,@X[$i]
+
+	addl	$t0,$e,$e
+	addl	$t2,$e,$e
+	or	$t3,$b,$b
+___
+$code.=<<___ if ($i>=15);	# with forward Xupdate
+	sll	$a,5,$t1
+	addl	$K,$e,$e
+	and	$b,$c,$t2
+	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
+
+	zapnot	$a,0xf,$a
+	addl	@X[$i%16],$e,$e
+	bic	$d,$b,$t3
+	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
+
+	srl	$a,27,$t0
+	addl	$t1,$e,$e
+	or	$t2,$t3,$t2
+	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
+
+	sll	$b,30,$b
+	addl	$t0,$e,$e
+	srl	@X[$j%16],31,$t1
+
+	addl	$t2,$e,$e
+	srl	$b,32,$t3
+	addl	@X[$j%16],@X[$j%16],@X[$j%16]
+
+	or	$t3,$b,$b
+	zapnot	@X[$i%16],0xf,@X[$i%16]
+	or	$t1,@X[$j%16],@X[$j%16]
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+$code.=<<___ if ($i<79);	# with forward Xupdate
+	sll	$a,5,$t1
+	addl	$K,$e,$e
+	zapnot	$a,0xf,$a
+	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
+
+	sll	$b,30,$t3
+	addl	$t1,$e,$e
+	xor	$b,$c,$t2
+	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
+
+	srl	$b,2,$b
+	addl	@X[$i%16],$e,$e
+	xor	$d,$t2,$t2
+	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
+
+	srl	@X[$j%16],31,$t1
+	addl	$t2,$e,$e
+	srl	$a,27,$t0
+	addl	@X[$j%16],@X[$j%16],@X[$j%16]
+
+	or	$t3,$b,$b
+	addl	$t0,$e,$e
+	or	$t1,@X[$j%16],@X[$j%16]
+___
+$code.=<<___ if ($i<77);
+	zapnot	@X[$i%16],0xf,@X[$i%16]
+___
+$code.=<<___ if ($i==79);	# with context fetch
+	sll	$a,5,$t1
+	addl	$K,$e,$e
+	zapnot	$a,0xf,$a
+	ldl	@X[0],0($ctx)
+
+	sll	$b,30,$t3
+	addl	$t1,$e,$e
+	xor	$b,$c,$t2
+	ldl	@X[1],4($ctx)
+
+	srl	$b,2,$b
+	addl	@X[$i%16],$e,$e
+	xor	$d,$t2,$t2
+	ldl	@X[2],8($ctx)
+
+	srl	$a,27,$t0
+	addl	$t2,$e,$e
+	ldl	@X[3],12($ctx)
+
+	or	$t3,$b,$b
+	addl	$t0,$e,$e
+	ldl	@X[4],16($ctx)
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+$code.=<<___;	# with forward Xupdate
+	sll	$a,5,$t1
+	addl	$K,$e,$e
+	zapnot	$a,0xf,$a
+	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
+
+	srl	$a,27,$t0
+	and	$b,$c,$t2
+	and	$b,$d,$t3
+	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
+
+	sll	$b,30,$b
+	addl	$t1,$e,$e
+	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
+
+	srl	@X[$j%16],31,$t1
+	addl	$t0,$e,$e
+	or	$t2,$t3,$t2
+	and	$c,$d,$t3
+
+	or	$t2,$t3,$t2
+	srl	$b,32,$t3
+	addl	@X[$i%16],$e,$e
+	addl	@X[$j%16],@X[$j%16],@X[$j%16]
+
+	or	$t3,$b,$b
+	addl	$t2,$e,$e
+	or	$t1,@X[$j%16],@X[$j%16]
+	zapnot	@X[$i%16],0xf,@X[$i%16]
+___
+}
+
+$code=<<___;
+#ifdef __linux__
+#include <asm/regdef.h>
+#else
+#include <asm.h>
+#include <regdef.h>
+#endif
+
+.text
+
+.set	noat
+.set	noreorder
+.globl	sha1_block_data_order
+.align	5
+.ent	sha1_block_data_order
+sha1_block_data_order:
+	lda	sp,-64(sp)
+	stq	ra,0(sp)
+	stq	s0,8(sp)
+	stq	s1,16(sp)
+	stq	s2,24(sp)
+	stq	s3,32(sp)
+	stq	s4,40(sp)
+	stq	s5,48(sp)
+	stq	fp,56(sp)
+	.mask	0x0400fe00,-64
+	.frame	sp,64,ra
+	.prologue 0
+
+	ldl	$A,0($ctx)
+	ldl	$B,4($ctx)
+	sll	$num,6,$num
+	ldl	$C,8($ctx)
+	ldl	$D,12($ctx)
+	ldl	$E,16($ctx)
+	addq	$inp,$num,$num
+
+.Lloop:
+	.set	noreorder
+	ldah	$K,23170(zero)
+	zapnot	$B,0xf,$B
+	lda	$K,31129($K)	# K_00_19
+___
+for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+
+$code.=<<___;
+	ldah	$K,28378(zero)
+	lda	$K,-5215($K)	# K_20_39
+___
+for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+
+$code.=<<___;
+	ldah	$K,-28900(zero)
+	lda	$K,-17188($K)	# K_40_59
+___
+for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+
+$code.=<<___;
+	ldah	$K,-13725(zero)
+	lda	$K,-15914($K)	# K_60_79
+___
+for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+
+$code.=<<___;
+	addl	@X[0],$A,$A
+	addl	@X[1],$B,$B
+	addl	@X[2],$C,$C
+	addl	@X[3],$D,$D
+	addl	@X[4],$E,$E
+	stl	$A,0($ctx)
+	stl	$B,4($ctx)
+	addq	$inp,64,$inp
+	stl	$C,8($ctx)
+	stl	$D,12($ctx)
+	stl	$E,16($ctx)
+	cmpult	$inp,$num,$t1
+	bne	$t1,.Lloop
+
+	.set	noreorder
+	ldq	ra,0(sp)
+	ldq	s0,8(sp)
+	ldq	s1,16(sp)
+	ldq	s2,24(sp)
+	ldq	s3,32(sp)
+	ldq	s4,40(sp)
+	ldq	s5,48(sp)
+	ldq	fp,56(sp)
+	lda	sp,64(sp)
+	ret	(ra)
+.end	sha1_block_data_order
+.ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+___
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
-- 
cgit v1.2.3