From aa4d426b4d3527d7e166df1a05058c9a4a0f6683 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 30 Apr 2021 00:33:56 +0200 Subject: initial/final commit --- openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl | 241 +++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl (limited to 'openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl') diff --git a/openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl b/openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl new file mode 100644 index 0000000..5589503 --- /dev/null +++ b/openssl-1.1.0h/crypto/rc4/asm/rc4-s390x.pl @@ -0,0 +1,241 @@ +#! /usr/bin/env perl +# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# February 2009 +# +# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to +# "cluster" Address Generation Interlocks, so that one pipeline stall +# resolves several dependencies. + +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 50% better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$rp="%r14"; +$sp="%r15"; +$code=<<___; +.text + +___ + +# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) +{ +$acc="%r0"; +$cnt="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$out="%r5"; + +@XX=("%r6","%r7"); +@TX=("%r8","%r9"); +$YY="%r10"; +$TY="%r11"; + +$code.=<<___; +.globl RC4 +.type RC4,\@function +.align 64 +RC4: + stm${g} %r6,%r11,6*$SIZE_T($sp) +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; + llgc $XX[0],0($key) + llgc $YY,1($key) + la $XX[0],1($XX[0]) + nill $XX[0],0xff + srlg $cnt,$len,3 + ltgr $cnt,$cnt + llgc $TX[0],2($XX[0],$key) + jz .Lshort + j .Loop8 + +.align 64 +.Loop8: +___ +for ($i=0;$i<8;$i++) { +$code.=<<___; + la $YY,0($YY,$TX[0]) # $i + nill $YY,255 + la $XX[1],1($XX[0]) + nill $XX[1],255 +___ +$code.=<<___ if ($i==1); + llgc $acc,2($TY,$key) +___ +$code.=<<___ if ($i>1); + sllg $acc,$acc,8 + ic $acc,2($TY,$key) +___ +$code.=<<___; + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + llgc $TX[1],2($XX[1],$key) + stc $TY,2($XX[0],$key) + cr $XX[1],$YY + jne .Lcmov$i + la $TX[1],0($TX[0]) +.Lcmov$i: + la $TY,0($TY,$TX[0]) + nill $TY,255 +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} + +$code.=<<___; + lg $TX[1],0($inp) + sllg $acc,$acc,8 + la $inp,8($inp) + ic $acc,2($TY,$key) + xgr $acc,$TX[1] + stg $acc,0($out) + la $out,8($out) + brctg $cnt,.Loop8 + +.Lshort: + lghi $acc,7 + ngr $len,$acc + jz .Lexit + j .Loop1 + +.align 16 +.Loop1: + la $YY,0($YY,$TX[0]) + nill $YY,255 + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + stc $TY,2($XX[0],$key) + ar $TY,$TX[0] + ahi $XX[0],1 + nill $TY,255 + nill $XX[0],255 + llgc $acc,0($inp) + la $inp,1($inp) + llgc $TY,2($TY,$key) + llgc $TX[0],2($XX[0],$key) + xr $acc,$TY + stc $acc,0($out) + la $out,1($out) + brct $len,.Loop1 + +.Lexit: + ahi $XX[0],-1 + stc $XX[0],0($key) + stc $YY,1($key) + lm${g} %r6,%r11,6*$SIZE_T($sp) + br $rp +.size RC4,.-RC4 +.string "RC4 for s390x, CRYPTOGAMS by " + +___ +} + +# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) +{ +$cnt="%r0"; +$idx="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$acc="%r5"; +$dat="%r6"; +$ikey="%r7"; +$iinp="%r8"; + +$code.=<<___; +.globl RC4_set_key +.type RC4_set_key,\@function +.align 64 +RC4_set_key: + stm${g} %r6,%r8,6*$SIZE_T($sp) + lhi $cnt,256 + la $idx,0(%r0) + sth $idx,0($key) +.align 4 +.L1stloop: + stc $idx,2($idx,$key) + la $idx,1($idx) + brct $cnt,.L1stloop + + lghi $ikey,-256 + lr $cnt,$len + la $iinp,0(%r0) + la $idx,0(%r0) +.align 16 +.L2ndloop: + llgc $acc,2+256($ikey,$key) + llgc $dat,0($iinp,$inp) + la $idx,0($idx,$acc) + la $ikey,1($ikey) + la $idx,0($idx,$dat) + nill $idx,255 + la $iinp,1($iinp) + tml $ikey,255 + llgc $dat,2($idx,$key) + stc $dat,2+256-1($ikey,$key) + stc $acc,2($idx,$key) + jz .Ldone + brct $cnt,.L2ndloop + lr $cnt,$len + la $iinp,0(%r0) + j .L2ndloop +.Ldone: + lm${g} %r6,%r8,6*$SIZE_T($sp) + br $rp +.size RC4_set_key,.-RC4_set_key + +___ +} + +# const char *RC4_options() +$code.=<<___; +.globl RC4_options +.type RC4_options,\@function +.align 16 +RC4_options: + larl %r2,.Loptions + br %r14 +.size RC4_options,.-RC4_options +.section .rodata +.Loptions: +.align 8 +.string "rc4(8x,char)" +___ + +print $code; +close STDOUT; # force flush -- cgit v1.2.3