From aa4d426b4d3527d7e166df1a05058c9a4a0f6683 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 30 Apr 2021 00:33:56 +0200 Subject: initial/final commit --- openssl-1.1.0h/crypto/perlasm/README | 124 ++ openssl-1.1.0h/crypto/perlasm/arm-xlate.pl | 177 +++ openssl-1.1.0h/crypto/perlasm/cbc.pl | 356 +++++ openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl | 265 ++++ openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl | 1702 ++++++++++++++++++++++++ openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl | 1186 +++++++++++++++++ openssl-1.1.0h/crypto/perlasm/x86asm.pl | 310 +++++ openssl-1.1.0h/crypto/perlasm/x86gas.pl | 265 ++++ openssl-1.1.0h/crypto/perlasm/x86masm.pl | 207 +++ openssl-1.1.0h/crypto/perlasm/x86nasm.pl | 186 +++ 10 files changed, 4778 insertions(+) create mode 100644 openssl-1.1.0h/crypto/perlasm/README create mode 100755 openssl-1.1.0h/crypto/perlasm/arm-xlate.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/cbc.pl create mode 100755 openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl create mode 100755 openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/x86asm.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/x86gas.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/x86masm.pl create mode 100644 openssl-1.1.0h/crypto/perlasm/x86nasm.pl (limited to 'openssl-1.1.0h/crypto/perlasm') diff --git a/openssl-1.1.0h/crypto/perlasm/README b/openssl-1.1.0h/crypto/perlasm/README new file mode 100644 index 0000000..e90bd8e --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/README @@ -0,0 +1,124 @@ +The perl scripts in this directory are my 'hack' to generate +multiple different assembler formats via the one original script. + +The way to use this library is to start with adding the path to this directory +and then include it. + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +The first thing we do is setup the file and type of assembler + +&asm_init($ARGV[0],$0); + +The first argument is the 'type'. Currently +'cpp', 'sol', 'a.out', 'elf' or 'win32'. +Argument 2 is the file name. + +The reciprocal function is +&asm_finish() which should be called at the end. + +There are 2 main 'packages'. x86ms.pl, which is the Microsoft assembler, +and x86unix.pl which is the unix (gas) version. + +Functions of interest are: +&external_label("des_SPtrans"); declare and external variable +&LB(reg); Low byte for a register +&HB(reg); High byte for a register +&BP(off,base,index,scale) Byte pointer addressing +&DWP(off,base,index,scale) Word pointer addressing +&stack_push(num) Basically a 'sub esp, num*4' with extra +&stack_pop(num) inverse of stack_push +&function_begin(name,extra) Start a function with pushing of + edi, esi, ebx and ebp. extra is extra win32 + external info that may be required. +&function_begin_B(name,extra) Same as normal function_begin but no pushing. +&function_end(name) Call at end of function. +&function_end_A(name) Standard pop and ret, for use inside functions +&function_end_B(name) Call at end but with poping or 'ret'. +&swtmp(num) Address on stack temp word. +&wparam(num) Parameter number num, that was push + in C convention. This all works over pushes + and pops. +&comment("hello there") Put in a comment. +&label("loop") Refer to a label, normally a jmp target. +&set_label("loop") Set a label at this point. +&data_word(word) Put in a word of data. + +So how does this all hold together? Given + +int calc(int len, int *data) + { + int i,j=0; + + for (i=0; i$output" || die "can't open $output: $!"; + +$flavour = "linux32" if (!$flavour or $flavour eq "void"); + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $arch = sub { + if ($flavour =~ /linux/) { ".arch\t".join(',',@_); } + else { ""; } +}; +my $fpu = sub { + if ($flavour =~ /linux/) { ".fpu\t".join(',',@_); } + else { ""; } +}; +my $hidden = sub { + if ($flavour =~ /ios/) { ".private_extern\t".join(',',@_); } + else { ".hidden\t".join(',',@_); } +}; +my $comm = sub { + my @args = split(/,\s*/,shift); + my $name = @args[0]; + my $global = \$GLOBALS{$name}; + my $ret; + + if ($flavour =~ /ios32/) { + $ret = ".comm\t_$name,@args[1]\n"; + $ret .= ".non_lazy_symbol_pointer\n"; + $ret .= "$name:\n"; + $ret .= ".indirect_symbol\t_$name\n"; + $ret .= ".long\t0"; + $name = "_$name"; + } else { $ret = ".comm\t".join(',',@args); } + + $$global = $name; + $ret; +}; +my $globl = sub { + my $name = shift; + my $global = \$GLOBALS{$name}; + my $ret; + + SWITCH: for ($flavour) { + /ios/ && do { $name = "_$name"; + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $global = $globl; +my $extern = sub { + &$globl(@_); + return; # return nothing +}; +my $type = sub { + if ($flavour =~ /linux/) { ".type\t".join(',',@_); } + elsif ($flavour =~ /ios32/) { if (join(',',@_) =~ /(\w+),%function/) { + "#ifdef __thumb2__\n". + ".thumb_func $1\n". + "#endif"; + } + } + else { ""; } +}; +my $size = sub { + if ($flavour =~ /linux/) { ".size\t".join(',',@_); } + else { ""; } +}; +my $inst = sub { + if ($flavour =~ /linux/) { ".inst\t".join(',',@_); } + else { ".long\t".join(',',@_); } +}; +my $asciz = sub { + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; + +sub range { + my ($r,$sfx,$start,$end) = @_; + + join(",",map("$r$_$sfx",($start..$end))); +} + +sub expand_line { + my $line = shift; + my @ret = (); + + pos($line)=0; + + while ($line =~ m/\G[^@\/\{\"]*/g) { + if ($line =~ m/\G(@|\/\/|$)/gc) { + last; + } + elsif ($line =~ m/\G\{/gc) { + my $saved_pos = pos($line); + $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e; + pos($line) = $saved_pos; + $line =~ m/\G[^\}]*\}/g; + } + elsif ($line =~ m/\G\"/gc) { + $line =~ m/\G[^\"]*\"/g; + } + } + + $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge; + + return $line; +} + +while(my $line=<>) { + + if ($line =~ m/^\s*(#|@|\/\/)/) { print $line; next; } + + $line =~ s|/\*.*\*/||; # get rid of C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|[\b\.]L(\w{2,})|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w{2,})|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + if ($label) { + printf "%s:",($GLOBALS{$label} or $label); + } + } + + if ($line !~ m/^[#@]/) { + $line =~ s|^\s*(\.?)(\S+)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $opcode; + if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) { + $opcode = eval("\$$1_$2"); + } else { + $opcode = eval("\$$mnemonic"); + } + + my $arg=expand_line($line); + + if (ref($opcode) eq 'CODE') { + $line = &$opcode($arg); + } elsif ($mnemonic) { + $line = $c.$mnemonic; + $line.= "\t$arg" if ($arg ne ""); + } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/openssl-1.1.0h/crypto/perlasm/cbc.pl b/openssl-1.1.0h/crypto/perlasm/cbc.pl new file mode 100644 index 0000000..ad79b24 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/cbc.pl @@ -0,0 +1,356 @@ +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) +# des_cblock (*input); +# des_cblock (*output); +# long length; +# des_key_schedule schedule; +# des_cblock (*ivec); +# int enc; +# +# calls +# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); +# + +#&cbc("des_ncbc_encrypt","des_encrypt",0); +#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", +# 1,4,5,3,5,-1); +#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", +# 0,4,5,3,5,-1); +#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", +# 0,6,7,3,4,5); +# +# When doing a cipher that needs bigendian order, +# for encrypt, the iv is kept in bigendian form, +# while for decrypt, it is kept in little endian. +sub cbc + { + local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; + # name is the function name + # enc_func and dec_func and the functions to call for encrypt/decrypt + # swap is true if byte order needs to be reversed + # iv_off is parameter number for the iv + # enc_off is parameter number for the encrypt/decrypt flag + # p1,p2,p3 are the offsets for parameters to be passed to the + # underlying calls. + + &function_begin_B($name,""); + &comment(""); + + $in="esi"; + $out="edi"; + $count="ebp"; + + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + $data_off=4; + $data_off+=4 if ($p1 > 0); + $data_off+=4 if ($p2 > 0); + $data_off+=4 if ($p3 > 0); + + &mov($count, &wparam(2)); # length + + &comment("getting iv ptr from parameter $iv_off"); + &mov("ebx", &wparam($iv_off)); # Get iv ptr + + &mov($in, &DWP(0,"ebx","",0));# iv[0] + &mov($out, &DWP(4,"ebx","",0));# iv[1] + + &push($out); + &push($in); + &push($out); # used in decrypt for iv[1] + &push($in); # used in decrypt for iv[0] + + &mov("ebx", "esp"); # This is the address of tin[2] + + &mov($in, &wparam(0)); # in + &mov($out, &wparam(1)); # out + + # We have loaded them all, how lets push things + &comment("getting encrypt flag from parameter $enc_off"); + &mov("ecx", &wparam($enc_off)); # Get enc flag + if ($p3 > 0) + { + &comment("get and push parameter $p3"); + if ($enc_off != $p3) + { &mov("eax", &wparam($p3)); &push("eax"); } + else { &push("ecx"); } + } + if ($p2 > 0) + { + &comment("get and push parameter $p2"); + if ($enc_off != $p2) + { &mov("eax", &wparam($p2)); &push("eax"); } + else { &push("ecx"); } + } + if ($p1 > 0) + { + &comment("get and push parameter $p1"); + if ($enc_off != $p1) + { &mov("eax", &wparam($p1)); &push("eax"); } + else { &push("ecx"); } + } + &push("ebx"); # push data/iv + + &cmp("ecx",0); + &jz(&label("decrypt")); + + &and($count,0xfffffff8); + &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0] + &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1] + + &jz(&label("encrypt_finish")); + + ############################################################# + + &set_label("encrypt_loop"); + # encrypt start + # "eax" and "ebx" hold iv (or the last cipher text) + + &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + # eax and ebx are the next iv. + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("encrypt_loop")); + +###################################################################3 + &set_label("encrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + &call(&label("PIC_point")); +&set_label("PIC_point"); + &blindpop("edx"); + &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); + &mov($count,&DWP(0,"ecx",$count,4)); + &add($count,"edx"); + &xor("ecx","ecx"); + &xor("edx","edx"); + #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); + &jmp_ptr($count); + +&set_label("ej7"); + &movb(&HB("edx"), &BP(6,$in,"",0)); + &shl("edx",8); +&set_label("ej6"); + &movb(&HB("edx"), &BP(5,$in,"",0)); +&set_label("ej5"); + &movb(&LB("edx"), &BP(4,$in,"",0)); +&set_label("ej4"); + &mov("ecx", &DWP(0,$in,"",0)); + &jmp(&label("ejend")); +&set_label("ej3"); + &movb(&HB("ecx"), &BP(2,$in,"",0)); + &shl("ecx",8); +&set_label("ej2"); + &movb(&HB("ecx"), &BP(1,$in,"",0)); +&set_label("ej1"); + &movb(&LB("ecx"), &BP(0,$in,"",0)); +&set_label("ejend"); + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + &jmp(&label("finish")); + + ############################################################# + ############################################################# + &set_label("decrypt",1); + # decrypt start + &and($count,0xfffffff8); + # The next 2 instructions are only for if the jz is taken + &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1] + &jz(&label("decrypt_finish")); + + &set_label("decrypt_loop"); + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + + &mov(&DWP(0,$out,"",0),"ecx"); + &mov(&DWP(4,$out,"",0),"edx"); + + &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv + &mov(&DWP($data_off+12,"esp","",0), "ebx"); # + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("decrypt_loop")); +############################ ENDIT #######################3 + &set_label("decrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + # this is for when we exit + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + +&set_label("dj7"); + &rotr("edx", 16); + &movb(&BP(6,$out,"",0), &LB("edx")); + &shr("edx",16); +&set_label("dj6"); + &movb(&BP(5,$out,"",0), &HB("edx")); +&set_label("dj5"); + &movb(&BP(4,$out,"",0), &LB("edx")); +&set_label("dj4"); + &mov(&DWP(0,$out,"",0), "ecx"); + &jmp(&label("djend")); +&set_label("dj3"); + &rotr("ecx", 16); + &movb(&BP(2,$out,"",0), &LB("ecx")); + &shl("ecx",16); +&set_label("dj2"); + &movb(&BP(1,$in,"",0), &HB("ecx")); +&set_label("dj1"); + &movb(&BP(0,$in,"",0), &LB("ecx")); +&set_label("djend"); + + # final iv is still in eax:ebx + &jmp(&label("finish")); + + +############################ FINISH #######################3 + &set_label("finish",1); + &mov("ecx", &wparam($iv_off)); # Get iv ptr + + ################################################# + $total=16+4; + $total+=4 if ($p1 > 0); + $total+=4 if ($p2 > 0); + $total+=4 if ($p3 > 0); + &add("esp",$total); + + &mov(&DWP(0,"ecx","",0), "eax"); # save iv + &mov(&DWP(4,"ecx","",0), "ebx"); # save iv + + &function_end_A($name); + + &align(64); + &set_label("cbc_enc_jmp_table"); + &data_word("0"); + &data_word(&label("ej1")."-".&label("PIC_point")); + &data_word(&label("ej2")."-".&label("PIC_point")); + &data_word(&label("ej3")."-".&label("PIC_point")); + &data_word(&label("ej4")."-".&label("PIC_point")); + &data_word(&label("ej5")."-".&label("PIC_point")); + &data_word(&label("ej6")."-".&label("PIC_point")); + &data_word(&label("ej7")."-".&label("PIC_point")); + # not used + #&set_label("cbc_dec_jmp_table",1); + #&data_word("0"); + #&data_word(&label("dj1")."-".&label("PIC_point")); + #&data_word(&label("dj2")."-".&label("PIC_point")); + #&data_word(&label("dj3")."-".&label("PIC_point")); + #&data_word(&label("dj4")."-".&label("PIC_point")); + #&data_word(&label("dj5")."-".&label("PIC_point")); + #&data_word(&label("dj6")."-".&label("PIC_point")); + #&data_word(&label("dj7")."-".&label("PIC_point")); + &align(64); + + &function_end_B($name); + + } + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl new file mode 100755 index 0000000..2d46e24 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl @@ -0,0 +1,265 @@ +#! /usr/bin/env perl +# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $globl = sub { + my $junk = shift; + my $name = shift; + my $global = \$GLOBALS{$name}; + my $ret; + + $name =~ s|^[\.\_]||; + + SWITCH: for ($flavour) { + /aix/ && do { $name = ".$name"; + last; + }; + /osx/ && do { $name = "_$name"; + last; + }; + /linux.*(32|64le)/ + && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function"; + last; + }; + /linux.*64/ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function\n"; + $ret .= ".section \".opd\",\"aw\"\n"; + $ret .= ".align 3\n"; + $ret .= "$name:\n"; + $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; + $ret .= ".previous\n"; + + $name = ".$name"; + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $text = sub { + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret; +}; +my $machine = sub { + my $junk = shift; + my $arch = shift; + if ($flavour =~ /osx/) + { $arch =~ s/\"//g; + $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); + } + ".machine $arch"; +}; +my $size = sub { + if ($flavour =~ /linux/) + { shift; + my $name = shift; $name =~ s|^[\.\_]||; + my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + $ret; + } + else + { ""; } +}; +my $asciz = sub { + shift; + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { + my $f = shift; + my $cr = 0; $cr = shift if ($#_>1); + # Some out-of-date 32-bit GNU assembler just can't handle cmplw... + ($flavour =~ /linux.*32/) ? + " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : + " cmplw ".join(',',$cr,@_); +}; +my $bdnz = sub { + my $f = shift; + my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint + " bc $bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : + " bclr $bo,0"; +}; +my $bnelr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +my $beqlr = sub { + my $f = shift; + my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { + my ($f,$ra,$rs,$n,$b) = @_; + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; +}; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /aix|linux64le/); +my $mtspr = sub { + my ($f,$idx,$ra) = @_; + if ($idx == 256 && $no_vrsave) { + " or $ra,$ra,$ra"; + } else { + " mtspr $idx,$ra"; + } +}; +my $mfspr = sub { + my ($f,$rd,$idx) = @_; + if ($idx == 256 && $no_vrsave) { + " li $rd,-1"; + } else { + " mfspr $rd,$idx"; + } +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +my $vaddudm = sub { vcrypto_op(@_, 192); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + +# PowerISA 3.0 stuff +my $maddhdu = sub { + my ($f, $rt, $ra, $rb, $rc) = @_; + " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49; +}; +my $maddld = sub { + my ($f, $rt, $ra, $rb, $rc) = @_; + " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51; +}; + +my $darn = sub { + my ($f, $rt, $l) = @_; + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); +}; + +while($line=<>) { + + $line =~ s|[#!;].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + if ($label) { + printf "%s:",($GLOBALS{$label} or $label); + printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); + } + } + + { + $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl new file mode 100644 index 0000000..bfdada8 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl @@ -0,0 +1,1702 @@ +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# Specific modes implementations for SPARC Architecture 2011. There +# is T4 dependency though, an ASI value that is not specified in the +# Architecture Manual. But as SPARC universe is rather monocultural, +# we imply that processor capable of executing crypto instructions +# can handle the ASI in question as well. This means that we ought to +# keep eyes open when new processors emerge... +# +# As for above mentioned ASI. It's so called "block initializing +# store" which cancels "read" in "read-update-write" on cache lines. +# This is "cooperative" optimization, as it reduces overall pressure +# on memory interface. Benefits can't be observed/quantified with +# usual benchmarks, on the contrary you can notice that single-thread +# performance for parallelizable modes is ~1.5% worse for largest +# block sizes [though few percent better for not so long ones]. All +# this based on suggestions from David Miller. + +$::bias="STACK_BIAS"; +$::frame="STACK_FRAME"; +$::size_t_cc="SIZE_T_CC"; + +sub asm_init { # to be called with @ARGV as argument + for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); } + if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } + else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; } +} + +# unified interface +my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); +# local variables +my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7)); + +sub alg_cbc_encrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_cbc_encrypt +.align 32 +${alg}${bits}_t4_cbc_encrypt: + save %sp, -$::frame, %sp + cmp $len, 0 + be,pn $::size_t_cc, .L${bits}_cbc_enc_abort + srln $len, 0, $len ! needed on v8+, "nop" on v9 + sub $inp, $out, $blk_init ! $inp!=$out +___ +$::code.=<<___ if (!$::evp); + andcc $ivec, 7, $ivoff + alignaddr $ivec, %g0, $ivec + + ldd [$ivec + 0], %f0 ! load ivec + bz,pt %icc, 1f + ldd [$ivec + 8], %f2 + ldd [$ivec + 16], %f4 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 +1: +___ +$::code.=<<___ if ($::evp); + ld [$ivec + 0], %f0 + ld [$ivec + 4], %f1 + ld [$ivec + 8], %f2 + ld [$ivec + 12], %f3 +___ +$::code.=<<___; + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_enckey + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 127 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<128 || + brnz,pn $blk_init, .L${bits}cbc_enc_blk ! $inp==$out) + srl $omask, $ooff, $omask + + alignaddrl $out, %g0, $out + srlx $len, 4, $len + prefetch [$out], 22 + +.L${bits}_cbc_enc_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f12 + movxtod %o1, %f14 + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_encrypt_1x + add $inp, 16, $inp + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_cbc_enc_loop + add $out, 16, $out +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3f + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_enc_abort: + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_enc_loop+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3f + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] + ret + restore + +.align 16 +3: alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec + mov 0xff, $omask + srl $omask, $ivoff, $omask + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$ivec + $omask]0xc0 + std %f6, [$ivec + 8] + add $ivec, 16, $ivec + orn %g0, $omask, $omask + stda %f8, [$ivec + $omask]0xc0 +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}cbc_enc_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + +.L${bits}_cbc_enc_blk_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 5f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +5: + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f12 + movxtod %o1, %f14 + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_encrypt_1x + add $inp, 16, $inp + sub $len, 1, $len + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + brnz,pt $len, .L${bits}_cbc_enc_blk_loop + add $out, 8, $out + + membar #StoreLoad|#StoreStore + brnz,pt $blk_init, .L${bits}_cbc_enc_loop + mov $blk_init, $len +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3b + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] +___ +$::code.=<<___; + ret + restore +.type ${alg}${bits}_t4_cbc_encrypt,#function +.size ${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt +___ +} + +sub alg_cbc_decrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_cbc_decrypt +.align 32 +${alg}${bits}_t4_cbc_decrypt: + save %sp, -$::frame, %sp + cmp $len, 0 + be,pn $::size_t_cc, .L${bits}_cbc_dec_abort + srln $len, 0, $len ! needed on v8+, "nop" on v9 + sub $inp, $out, $blk_init ! $inp!=$out +___ +$::code.=<<___ if (!$::evp); + andcc $ivec, 7, $ivoff + alignaddr $ivec, %g0, $ivec + + ldd [$ivec + 0], %f12 ! load ivec + bz,pt %icc, 1f + ldd [$ivec + 8], %f14 + ldd [$ivec + 16], %f0 + faligndata %f12, %f14, %f12 + faligndata %f14, %f0, %f14 +1: +___ +$::code.=<<___ if ($::evp); + ld [$ivec + 0], %f12 ! load ivec + ld [$ivec + 4], %f13 + ld [$ivec + 8], %f14 + ld [$ivec + 12], %f15 +___ +$::code.=<<___; + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_deckey + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}cbc_dec_blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? + srlx $len, 4, $len + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_cbc_dec_loop2x + prefetch [$out], 22 +.L${bits}_cbc_dec_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g4, %o0, %o2 ! ^= rk[0] + xor %g5, %o1, %o3 + movxtod %o2, %f0 + movxtod %o3, %f2 + + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_decrypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o0, %f12 + movxtod %o1, %f14 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_cbc_dec_loop2x + add $out, 16, $out +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_dec_abort: + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_cbc_dec_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + xor %g4, %o0, %o4 ! ^= rk[0] + xor %g5, %o1, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + xor %g4, %o2, %o4 + xor %g5, %o3, %o5 + movxtod %o4, %f4 + movxtod %o5, %f6 + + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_decrypt_2x + add $inp, 32, $inp + + movxtod %o0, %f8 + movxtod %o1, %f10 + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o2, %f12 + movxtod %o3, %f14 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_cbc_dec_loop2x + add $out, 32, $out +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + faligndata %f6, %f6, %f6 + stda %f8, [$out + $omask]0xc0 ! partial store + std %f0, [$out + 8] + std %f2, [$out + 16] + std %f4, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f6, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] + ret + restore + +.align 16 +.L${bits}_cbc_dec_unaligned_ivec: + alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec + mov 0xff, $omask + srl $omask, $ivoff, $omask + faligndata %f12, %f12, %f0 + faligndata %f12, %f14, %f2 + faligndata %f14, %f14, %f4 + stda %f0, [$ivec + $omask]0xc0 + std %f2, [$ivec + 8] + add $ivec, 16, $ivec + orn %g0, $omask, $omask + stda %f4, [$ivec + $omask]0xc0 +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}cbc_dec_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_cbc_dec_blk_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + xor %g4, %o0, %o4 ! ^= rk[0] + xor %g5, %o1, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + xor %g4, %o2, %o4 + xor %g5, %o3, %o5 + movxtod %o4, %f4 + movxtod %o5, %f6 + + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_decrypt_2x + add $inp, 32, $inp + subcc $len, 2, $len + + movxtod %o0, %f8 + movxtod %o1, %f10 + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o2, %f12 + movxtod %o3, %f14 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_cbc_dec_blk_loop2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_cbc_dec_loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_cbc_dec_loop2x + nop +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] ! write out ivec + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3b + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore +.type ${alg}${bits}_t4_cbc_decrypt,#function +.size ${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt +___ +} + +sub alg_ctr32_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_ctr32_encrypt +.align 32 +${alg}${bits}_t4_ctr32_encrypt: + save %sp, -$::frame, %sp + srln $len, 0, $len ! needed on v8+, "nop" on v9 + + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_enckey + sllx $len, 4, $len + + ld [$ivec + 0], %l4 ! counter + ld [$ivec + 4], %l5 + ld [$ivec + 8], %l6 + ld [$ivec + 12], %l7 + + sllx %l4, 32, %o5 + or %l5, %o5, %o5 + sllx %l6, 32, %g1 + xor %o5, %g4, %g4 ! ^= rk[0] + xor %g1, %g5, %g5 + movxtod %g4, %f14 ! most significant 64 bits + + sub $inp, $out, $blk_init ! $inp!=$out + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}_ctr32_blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_ctr32_loop2x + srlx $len, 4, $len +.L${bits}_ctr32_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f4 + aes_eround23 %f18, %f14, %f2, %f2 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f18, %f14, %f2, %f0 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_1x+8 + add $inp, 16, $inp + + movxtod %o0, %f10 + movxtod %o1, %f12 + fxor %f10, %f0, %f0 ! ^= inp + fxor %f12, %f2, %f2 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_ctr32_loop2x + add $out, 16, $out + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_ctr32_loop2x+4 + orn %g0, $omask, $omask + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_ctr32_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + xor %g5, %l7, %g1 + add %l7, 1, %l7 + movxtod %g1, %f6 + srl %l7, 0, %l7 ! clruw + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f8 + aes_eround23 %f18, %f14, %f2, %f2 + aes_eround01 %f16, %f14, %f6, %f10 + aes_eround23 %f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f16, %f6, %f14, %f6 + camellia_f %f18, %f14, %f2, %f0 + camellia_f %f18, %f14, %f6, %f4 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_2x+16 + add $inp, 32, $inp + + movxtod %o0, %f8 + movxtod %o1, %f10 + movxtod %o2, %f12 + fxor %f8, %f0, %f0 ! ^= inp + movxtod %o3, %f8 + fxor %f10, %f2, %f2 + fxor %f12, %f4, %f4 + fxor %f8, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_ctr32_loop2x + add $out, 32, $out + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + faligndata %f6, %f6, %f6 + + stda %f8, [$out + $omask]0xc0 ! partial store + std %f0, [$out + 8] + std %f2, [$out + 16] + std %f4, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f6, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_ctr32_loop2x+4 + orn %g0, $omask, $omask + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_ctr32_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_ctr32_blk_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + xor %g5, %l7, %g1 + add %l7, 1, %l7 + movxtod %g1, %f6 + srl %l7, 0, %l7 ! clruw + prefetch [$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f8 + aes_eround23 %f18, %f14, %f2, %f2 + aes_eround01 %f16, %f14, %f6, %f10 + aes_eround23 %f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f16, %f6, %f14, %f6 + camellia_f %f18, %f14, %f2, %f0 + camellia_f %f18, %f14, %f6, %f4 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_2x+16 + add $inp, 32, $inp + subcc $len, 2, $len + + movxtod %o0, %f8 + movxtod %o1, %f10 + movxtod %o2, %f12 + fxor %f8, %f0, %f0 ! ^= inp + movxtod %o3, %f8 + fxor %f10, %f2, %f2 + fxor %f12, %f4, %f4 + fxor %f8, %f6, %f6 + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_ctr32_blk_loop2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_ctr32_loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_ctr32_loop2x + nop + + ret + restore +.type ${alg}${bits}_t4_ctr32_encrypt,#function +.size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt +___ +} + +sub alg_xts_implement { +my ($alg,$bits,$dir) = @_; +my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5)); +my $rem=$ivec; + +$::code.=<<___; +.globl ${alg}${bits}_t4_xts_${dir}crypt +.align 32 +${alg}${bits}_t4_xts_${dir}crypt: + save %sp, -$::frame-16, %sp + srln $len, 0, $len ! needed on v8+, "nop" on v9 + + mov $ivec, %o0 + add %fp, $::bias-16, %o1 + call ${alg}_t4_encrypt + mov $key2, %o2 + + add %fp, $::bias-16, %l7 + ldxa [%l7]0x88, %g2 + add %fp, $::bias-8, %l7 + ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak + + sethi %hi(0x76543210), %l7 + or %l7, %lo(0x76543210), %l7 + bmask %l7, %g0, %g0 ! byte swap mask + + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_${dir}ckey + and $len, 15, $rem + and $len, -16, $len +___ +$code.=<<___ if ($dir eq "de"); + mov 0, %l7 + movrnz $rem, 16, %l7 + sub $len, %l7, $len +___ +$code.=<<___; + + sub $inp, $out, $blk_init ! $inp!=$out + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}_xts_${dir}blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? +___ +$code.=<<___ if ($dir eq "de"); + brz,pn $len, .L${bits}_xts_${dir}steal +___ +$code.=<<___; + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_xts_${dir}loop2x + srlx $len, 4, $len +.L${bits}_xts_${dir}loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f0 + movxtod %o1, %f2 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_${dir}crypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_xts_${dir}loop2x + add $out, 16, $out + + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 + orn %g0, $omask, $omask + + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_xts_${dir}loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + movxtod %g2, %f8 + movxtod %g3, %f10 + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + xor %g4, %o2, %o2 ! ^= rk[0] + xor %g5, %o3, %o3 + movxtod %o0, %f0 + movxtod %o1, %f2 + movxtod %o2, %f4 + movxtod %o3, %f6 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 ! ^= tweak[0] + fxor %f10, %f6, %f6 + + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_${dir}crypt_2x + add $inp, 32, $inp + + movxtod %g2, %f8 + movxtod %g3, %f10 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_xts_${dir}loop2x + add $out, 32, $out + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f10 + faligndata %f2, %f4, %f12 + faligndata %f4, %f6, %f14 + faligndata %f6, %f6, %f0 + + stda %f8, [$out + $omask]0xc0 ! partial store + std %f10, [$out + 8] + std %f12, [$out + 16] + std %f14, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 + orn %g0, $omask, $omask + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_xts_${dir}blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_xts_${dir}blk2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + movxtod %g2, %f8 + movxtod %g3, %f10 + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + xor %g4, %o2, %o2 ! ^= rk[0] + xor %g5, %o3, %o3 + movxtod %o0, %f0 + movxtod %o1, %f2 + movxtod %o2, %f4 + movxtod %o3, %f6 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 ! ^= tweak[0] + fxor %f10, %f6, %f6 + + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_${dir}crypt_2x + add $inp, 32, $inp + + movxtod %g2, %f8 + movxtod %g3, %f10 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + subcc $len, 2, $len + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_xts_${dir}loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_xts_${dir}loop2x + nop + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +___ +$code.=<<___ if ($dir eq "en"); +.align 32 +.L${bits}_xts_${dir}steal: + std %f0, [%fp + $::bias-16] ! copy of output + std %f2, [%fp + $::bias-8] + + srl $ileft, 3, $ileft + add %fp, $::bias-16, %l7 + add $inp, $ileft, $inp ! original $inp+$len&-15 + add $out, $ooff, $out ! original $out+$len&-15 + mov 0, $ileft + nop ! align + +.L${bits}_xts_${dir}stealing: + ldub [$inp + $ileft], %o0 + ldub [%l7 + $ileft], %o1 + dec $rem + stb %o0, [%l7 + $ileft] + stb %o1, [$out + $ileft] + brnz $rem, .L${bits}_xts_${dir}stealing + inc $ileft + + mov %l7, $inp + sub $out, 16, $out + mov 0, $ileft + sub $out, $ooff, $out + ba .L${bits}_xts_${dir}loop ! one more time + mov 1, $len ! $rem is 0 +___ +$code.=<<___ if ($dir eq "de"); +.align 32 +.L${bits}_xts_${dir}steal: + ldx [$inp + 0], %o0 + brz,pt $ileft, 8f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +8: + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %o2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %o3 + xor %l7, %o2, %o2 + + movxtod %o2, %f12 + movxtod %o3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f0 + movxtod %o1, %f2 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + call _${alg}${bits}_${dir}crypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + std %f0, [%fp + $::bias-16] + std %f2, [%fp + $::bias-8] + + srl $ileft, 3, $ileft + add %fp, $::bias-16, %l7 + add $inp, $ileft, $inp ! original $inp+$len&-15 + add $out, $ooff, $out ! original $out+$len&-15 + mov 0, $ileft + add $out, 16, $out + nop ! align + +.L${bits}_xts_${dir}stealing: + ldub [$inp + $ileft], %o0 + ldub [%l7 + $ileft], %o1 + dec $rem + stb %o0, [%l7 + $ileft] + stb %o1, [$out + $ileft] + brnz $rem, .L${bits}_xts_${dir}stealing + inc $ileft + + mov %l7, $inp + sub $out, 16, $out + mov 0, $ileft + sub $out, $ooff, $out + ba .L${bits}_xts_${dir}loop ! one more time + mov 1, $len ! $rem is 0 +___ +$code.=<<___; + ret + restore +.type ${alg}${bits}_t4_xts_${dir}crypt,#function +.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt +___ +} + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %visopf = ( "faligndata" => 0x048, + "bshuffle" => 0x04c, + "fnot2" => 0x066, + "fxor" => 0x06c, + "fsrc2" => 0x078 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "addxc" => 0x011, + "addxccc" => 0x013, + "umulxhi" => 0x016, + "alignaddr" => 0x018, + "bmask" => 0x019, + "alignaddrl" => 0x01a ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unaes_round { # 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "aes_eround01" => 0, + "aes_eround23" => 1, + "aes_dround01" => 2, + "aes_dround23" => 3, + "aes_eround01_l"=> 4, + "aes_eround23_l"=> 5, + "aes_dround01_l"=> 6, + "aes_dround23_l"=> 7, + "aes_kexpand1" => 8 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unaes_kexpand { # 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "aes_kexpand0" => 0x130, + "aes_kexpand2" => 0x131 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub uncamellia_f { # 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); + + $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + + if (1) { + $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub uncamellia3 { # 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %cmllopf = ( "camellia_fl" => 0x13c, + "camellia_fli" => 0x13d ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if (defined($opf=$cmllopf{$mnemonic})) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unmovxtox { # 2-argument instructions +my ($mnemonic,$rs,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 ); +my ($ref,$opf); +my %movxopf = ( "movdtox" => 0x110, + "movstouw" => 0x111, + "movstosw" => 0x113, + "movxtod" => 0x118, + "movwtos" => 0x119 ); + + $ref = "$mnemonic\t$rs,$rd"; + + if (defined($opf=$movxopf{$mnemonic})) { + foreach ($rs,$rd) { + return $ref if (!/%([fgoli])([0-9]{1,2})/); + $_=$bias{$1}+$2; + if ($2>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($2|$2>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$opf<<5|$rs, + $ref; + } else { + return $ref; + } +} + +sub undes { +my ($mnemonic)=shift; +my @args=@_; +my ($ref,$opf); +my %desopf = ( "des_round" => 0b1001, + "des_ip" => 0b100110100, + "des_iip" => 0b100110101, + "des_kexpand" => 0b100110110 ); + + $ref = "$mnemonic\t".join(",",@_); + + if (defined($opf=$desopf{$mnemonic})) { # 4-arg + if ($mnemonic eq "des_round") { + foreach (@args[0..3]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, + $ref; + } elsif ($mnemonic eq "des_kexpand") { # 3-arg + foreach (@args[0..2]) { + return $ref if (!/(%f)?([0-9]{1,2})/); + $_=$2; + if ($2>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($2|$2>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, + $ref; + } else { # 2-arg + foreach (@args[0..1]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, + $ref; + } + } else { + return $ref; + } +} + +sub emit_assembler { + foreach (split("\n",$::code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go; + + s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ + &unaes_round($1,$2,$3,$4,$5) + /geo or + s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unaes_kexpand($1,$2,$3,$4) + /geo or + s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ + &uncamellia_f($1,$2,$3,$4,$5) + /geo or + s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &uncamellia3($1,$2,$3,$4) + /geo or + s/\b(des_\w+)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+)(?:,\s*(%f[0-9]{1,2})(?:,\s*(%f[0-9]{1,2}))?)?/ + &undes($1,$2,$3,$4,$5) + /geo or + s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ + &unmovxtox($1,$2,$3) + /geo or + s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ + &unmovxtox($1,$2,$3) + /geo or + s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /geo or + s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /geo; + + print $_,"\n"; + } +} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl new file mode 100755 index 0000000..6eaefcf --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl @@ -0,0 +1,1186 @@ +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# Ascetic x86_64 AT&T to MASM/NASM assembler translator by . +# +# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T +# format is way easier to parse. Because it's simpler to "gear" from +# Unix ABI to Windows one [see cross-reference "card" at the end of +# file]. Because Linux targets were available first... +# +# In addition the script also "distills" code suitable for GNU +# assembler, so that it can be compiled with more rigid assemblers, +# such as Solaris /usr/ccs/bin/as. +# +# This translator is not designed to convert *arbitrary* assembler +# code from AT&T format to MASM one. It's designed to convert just +# enough to provide for dual-ABI OpenSSL modules development... +# There *are* limitations and you might have to modify your assembler +# code or this script to achieve the desired result... +# +# Currently recognized limitations: +# +# - can't use multiple ops per line; +# +# Dual-ABI styling rules. +# +# 1. Adhere to Unix register and stack layout [see cross-reference +# ABI "card" at the end for explanation]. +# 2. Forget about "red zone," stick to more traditional blended +# stack frame allocation. If volatile storage is actually required +# that is. If not, just leave the stack as is. +# 3. Functions tagged with ".type name,@function" get crafted with +# unified Win64 prologue and epilogue automatically. If you want +# to take care of ABI differences yourself, tag functions as +# ".type name,@abi-omnipotent" instead. +# 4. To optimize the Win64 prologue you can specify number of input +# arguments as ".type name,@function,N." Keep in mind that if N is +# larger than 6, then you *have to* write "abi-omnipotent" code, +# because >6 cases can't be addressed with unified prologue. +# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: +# (sorry about latter). +# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is +# required to identify the spots, where to inject Win64 epilogue! +# But on the pros, it's then prefixed with rep automatically:-) +# 7. Stick to explicit ip-relative addressing. If you have to use +# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. +# Both are recognized and translated to proper Win64 addressing +# modes. To support legacy code a synthetic directive, .picmeup, +# is implemented. It puts address of the *next* instruction into +# target register, e.g.: +# +# .picmeup %rax +# lea .Label-.(%rax),%rax +# +# 8. In order to provide for structured exception handling unified +# Win64 prologue copies %rsp value to %rax. For further details +# see SEH paragraph at the end. +# 9. .init segment is allowed to contain calls to functions only. +# a. If function accepts more than 4 arguments *and* >4th argument +# is declared as non 64-bit value, do clear its upper part. + + +use strict; + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +open STDOUT,">$output" || die "can't open $output: $!" + if (defined($output)); + +my $gas=1; $gas=0 if ($output =~ /\.asm$/); +my $elf=1; $elf=0 if (!$gas); +my $win64=0; +my $prefix=""; +my $decor=".L"; + +my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 +my $masm=0; +my $PTR=" PTR"; + +my $nasmref=2.03; +my $nasm=0; + +if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1; + $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; + $prefix =~ s|\R$||; # Better chomp + } +elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } +elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } +elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } +elsif (!$gas) +{ if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i) + { $nasm = $1 + $2*0.01; $PTR=""; } + elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) + { $masm = $1 + $2*2**-16 + $4*2**-32; } + die "no assembler found on %PATH" if (!($nasm || $masm)); + $win64=1; + $elf=0; + $decor="\$L\$"; +} + +my $current_segment; +my $current_function; +my %globals; + +{ package opcode; # pick up opcodes + sub re { + my ($class, $line) = @_; + my $self = {}; + my $ret; + + if ($$line =~ /^([a-z][a-z0-9]*)/i) { + bless $self,$class; + $self->{op} = $1; + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + + undef $self->{sz}; + if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain... + $self->{op} = $1; + $self->{sz} = $2; + } elsif ($self->{op} =~ /call|jmp/) { + $self->{sz} = ""; + } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn + $self->{sz} = ""; + } elsif ($self->{op} =~ /^v/) { # VEX + $self->{sz} = ""; + } elsif ($self->{op} =~ /mov[dq]/ && $$line =~ /%xmm/) { + $self->{sz} = ""; + } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { + $self->{op} = $1; + $self->{sz} = $2; + } + } + $ret; + } + sub size { + my ($self, $sz) = @_; + $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); + $self->{sz}; + } + sub out { + my $self = shift; + if ($gas) { + if ($self->{op} eq "movz") { # movz is pain... + sprintf "%s%s%s",$self->{op},$self->{sz},shift; + } elsif ($self->{op} =~ /^set/) { + "$self->{op}"; + } elsif ($self->{op} eq "ret") { + my $epilogue = ""; + if ($win64 && $current_function->{abi} eq "svr4") { + $epilogue = "movq 8(%rsp),%rdi\n\t" . + "movq 16(%rsp),%rsi\n\t"; + } + $epilogue . ".byte 0xf3,0xc3"; + } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { + ".p2align\t3\n\t.quad"; + } else { + "$self->{op}$self->{sz}"; + } + } else { + $self->{op} =~ s/^movz/movzx/; + if ($self->{op} eq "ret") { + $self->{op} = ""; + if ($win64 && $current_function->{abi} eq "svr4") { + $self->{op} = "mov rdi,QWORD$PTR\[8+rsp\]\t;WIN64 epilogue\n\t". + "mov rsi,QWORD$PTR\[16+rsp\]\n\t"; + } + $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; + } elsif ($self->{op} =~ /^(pop|push)f/) { + $self->{op} .= $self->{sz}; + } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { + $self->{op} = "\tDQ"; + } + $self->{op}; + } + } + sub mnemonic { + my ($self, $op) = @_; + $self->{op}=$op if (defined($op)); + $self->{op}; + } +} +{ package const; # pick up constants, which start with $ + sub re { + my ($class, $line) = @_; + my $self = {}; + my $ret; + + if ($$line =~ /^\$([^,]+)/) { + bless $self, $class; + $self->{value} = $1; + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + } + $ret; + } + sub out { + my $self = shift; + + $self->{value} =~ s/\b(0b[0-1]+)/oct($1)/eig; + if ($gas) { + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{value} + my $value = $self->{value}; + no warnings; # oct might complain about overflow, ignore here... + $value =~ s/(?{value} = $value; + } + sprintf "\$%s",$self->{value}; + } else { + my $value = $self->{value}; + $value =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); + sprintf "%s",$value; + } + } +} +{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) + sub re { + my ($class, $line, $opcode) = @_; + my $self = {}; + my $ret; + + # optional * ----vvv--- appears in indirect jmp/call + if ($$line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) { + bless $self, $class; + $self->{asterisk} = $1; + $self->{label} = $2; + ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); + $self->{scale} = 1 if (!defined($self->{scale})); + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + + if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { + die if ($opcode->mnemonic() ne "mov"); + $opcode->mnemonic("lea"); + } + $self->{base} =~ s/^%//; + $self->{index} =~ s/^%// if (defined($self->{index})); + $self->{opcode} = $opcode; + } + $ret; + } + sub size {} + sub out { + my ($self, $sz) = @_; + + $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $self->{label} =~ s/\.L/$decor/g; + + # Silently convert all EAs to 64-bit. This is required for + # elder GNU assembler and results in more compact code, + # *but* most importantly AES module depends on this feature! + $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{label}... + use integer; + $self->{label} =~ s/(?{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg; + + # Some assemblers insist on signed presentation of 32-bit + # offsets, but sign extension is a tricky business in perl... + if ((1<<31)<<1) { + $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg; + } else { + $self->{label} =~ s/\b([0-9]+)\b/$1>>0/eg; + } + + if (!$self->{label} && $self->{index} && $self->{scale}==1 && + $self->{base} =~ /(rbp|r13)/) { + $self->{base} = $self->{index}; $self->{index} = $1; + } + + if ($gas) { + $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); + + if (defined($self->{index})) { + sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk}, + $self->{label}, + $self->{base}?"%$self->{base}":"", + $self->{index},$self->{scale}; + } else { + sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; + } + } else { + my %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", + l=>"DWORD$PTR", d=>"DWORD$PTR", + q=>"QWORD$PTR", o=>"OWORD$PTR", + x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" ); + + $self->{label} =~ s/\./\$/g; + $self->{label} =~ s/(?{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); + + my $mnemonic = $self->{opcode}->mnemonic(); + ($self->{asterisk}) && ($sz="q") || + ($mnemonic =~ /^v?mov([qd])$/) && ($sz=$1) || + ($mnemonic =~ /^v?pinsr([qdwb])$/) && ($sz=$1) || + ($mnemonic =~ /^vpbroadcast([qdwb])$/) && ($sz=$1) || + ($mnemonic =~ /^v(?!perm)[a-z]+[fi]128$/) && ($sz="x"); + + if (defined($self->{index})) { + sprintf "%s[%s%s*%d%s]",$szmap{$sz}, + $self->{label}?"$self->{label}+":"", + $self->{index},$self->{scale}, + $self->{base}?"+$self->{base}":""; + } elsif ($self->{base} eq "rip") { + sprintf "%s[%s]",$szmap{$sz},$self->{label}; + } else { + sprintf "%s[%s%s]",$szmap{$sz}, + $self->{label}?"$self->{label}+":"", + $self->{base}; + } + } + } +} +{ package register; # pick up registers, which start with %. + sub re { + my ($class, $line, $opcode) = @_; + my $self = {}; + my $ret; + + # optional * ----vvv--- appears in indirect jmp/call + if ($$line =~ /^(\*?)%(\w+)/) { + bless $self,$class; + $self->{asterisk} = $1; + $self->{value} = $2; + $opcode->size($self->size()); + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + } + $ret; + } + sub size { + my $self = shift; + my $ret; + + if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } + elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } + elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } + elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } + elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } + elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } + + $ret; + } + sub out { + my $self = shift; + if ($gas) { sprintf "%s%%%s",$self->{asterisk},$self->{value}; } + else { $self->{value}; } + } +} +{ package label; # pick up labels, which end with : + sub re { + my ($class, $line) = @_; + my $self = {}; + my $ret; + + if ($$line =~ /(^[\.\w]+)\:/) { + bless $self,$class; + $self->{value} = $1; + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + + $self->{value} =~ s/^\.L/$decor/; + } + $ret; + } + sub out { + my $self = shift; + + if ($gas) { + my $func = ($globals{$self->{value}} or $self->{value}) . ":"; + if ($win64 && + $current_function->{name} eq $self->{value} && + $current_function->{abi} eq "svr4") { + $func .= "\n"; + $func .= " movq %rdi,8(%rsp)\n"; + $func .= " movq %rsi,16(%rsp)\n"; + $func .= " movq %rsp,%rax\n"; + $func .= "${decor}SEH_begin_$current_function->{name}:\n"; + my $narg = $current_function->{narg}; + $narg=6 if (!defined($narg)); + $func .= " movq %rcx,%rdi\n" if ($narg>0); + $func .= " movq %rdx,%rsi\n" if ($narg>1); + $func .= " movq %r8,%rdx\n" if ($narg>2); + $func .= " movq %r9,%rcx\n" if ($narg>3); + $func .= " movq 40(%rsp),%r8\n" if ($narg>4); + $func .= " movq 48(%rsp),%r9\n" if ($narg>5); + } + $func; + } elsif ($self->{value} ne "$current_function->{name}") { + # Make all labels in masm global. + $self->{value} .= ":" if ($masm); + $self->{value} . ":"; + } elsif ($win64 && $current_function->{abi} eq "svr4") { + my $func = "$current_function->{name}" . + ($nasm ? ":" : "\tPROC $current_function->{scope}") . + "\n"; + $func .= " mov QWORD$PTR\[8+rsp\],rdi\t;WIN64 prologue\n"; + $func .= " mov QWORD$PTR\[16+rsp\],rsi\n"; + $func .= " mov rax,rsp\n"; + $func .= "${decor}SEH_begin_$current_function->{name}:"; + $func .= ":" if ($masm); + $func .= "\n"; + my $narg = $current_function->{narg}; + $narg=6 if (!defined($narg)); + $func .= " mov rdi,rcx\n" if ($narg>0); + $func .= " mov rsi,rdx\n" if ($narg>1); + $func .= " mov rdx,r8\n" if ($narg>2); + $func .= " mov rcx,r9\n" if ($narg>3); + $func .= " mov r8,QWORD$PTR\[40+rsp\]\n" if ($narg>4); + $func .= " mov r9,QWORD$PTR\[48+rsp\]\n" if ($narg>5); + $func .= "\n"; + } else { + "$current_function->{name}". + ($nasm ? ":" : "\tPROC $current_function->{scope}"); + } + } +} +{ package expr; # pick up expressions + sub re { + my ($class, $line, $opcode) = @_; + my $self = {}; + my $ret; + + if ($$line =~ /(^[^,]+)/) { + bless $self,$class; + $self->{value} = $1; + $ret = $self; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + + $self->{value} =~ s/\@PLT// if (!$elf); + $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $self->{value} =~ s/\.L/$decor/g; + $self->{opcode} = $opcode; + } + $ret; + } + sub out { + my $self = shift; + if ($nasm && $self->{opcode}->mnemonic()=~m/^j(?![re]cxz)/) { + "NEAR ".$self->{value}; + } else { + $self->{value}; + } + } +} +{ package directive; # pick up directives, which start with . + sub re { + my ($class, $line) = @_; + my $self = {}; + my $ret; + my $dir; + my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: + ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, + "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, + "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, + "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, + "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, + "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, + "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, + "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); + + if ($$line =~ /^\s*(\.\w+)/) { + bless $self,$class; + $dir = $1; + $ret = $self; + undef $self->{value}; + $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + + SWITCH: for ($dir) { + /\.picmeup/ && do { if ($$line =~ /(%r[\w]+)/i) { + $dir="\t.long"; + $$line=sprintf "0x%x,0x90000000",$opcode{$1}; + } + last; + }; + /\.global|\.globl|\.extern/ + && do { $globals{$$line} = $prefix . $$line; + $$line = $globals{$$line} if ($prefix); + last; + }; + /\.type/ && do { my ($sym,$type,$narg) = split(',',$$line); + if ($type eq "\@function") { + undef $current_function; + $current_function->{name} = $sym; + $current_function->{abi} = "svr4"; + $current_function->{narg} = $narg; + $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; + } elsif ($type eq "\@abi-omnipotent") { + undef $current_function; + $current_function->{name} = $sym; + $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; + } + $$line =~ s/\@abi\-omnipotent/\@function/; + $$line =~ s/\@function.*/\@function/; + last; + }; + /\.asciz/ && do { if ($$line =~ /^"(.*)"$/) { + $dir = ".byte"; + $$line = join(",",unpack("C*",$1),0); + } + last; + }; + /\.rva|\.long|\.quad/ + && do { $$line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $$line =~ s/\.L/$decor/g; + last; + }; + } + + if ($gas) { + $self->{value} = $dir . "\t" . $$line; + + if ($dir =~ /\.extern/) { + $self->{value} = ""; # swallow extern + } elsif (!$elf && $dir =~ /\.type/) { + $self->{value} = ""; + $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . + (defined($globals{$1})?".scl 2;":".scl 3;") . + "\t.type 32;\t.endef" + if ($win64 && $$line =~ /([^,]+),\@function/); + } elsif (!$elf && $dir =~ /\.size/) { + $self->{value} = ""; + if (defined($current_function)) { + $self->{value} .= "${decor}SEH_end_$current_function->{name}:" + if ($win64 && $current_function->{abi} eq "svr4"); + undef $current_function; + } + } elsif (!$elf && $dir =~ /\.align/) { + $self->{value} = ".p2align\t" . (log($$line)/log(2)); + } elsif ($dir eq ".section") { + $current_segment=$$line; + if (!$elf && $current_segment eq ".init") { + if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } + elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } + } + } elsif ($dir =~ /\.(text|data)/) { + $current_segment=".$1"; + } elsif ($dir =~ /\.hidden/) { + if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$$line"; } + elsif ($flavour eq "mingw64") { $self->{value} = ""; } + } elsif ($dir =~ /\.comm/) { + $self->{value} = "$dir\t$prefix$$line"; + $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); + } + $$line = ""; + return $self; + } + + # non-gas case or nasm/masm + SWITCH: for ($dir) { + /\.text/ && do { my $v=undef; + if ($nasm) { + $v="section .text code align=64\n"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = ".text\$"; + $v.="$current_segment\tSEGMENT "; + $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE"; + $v.=" 'CODE'"; + } + $self->{value} = $v; + last; + }; + /\.data/ && do { my $v=undef; + if ($nasm) { + $v="section .data data align=8\n"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = "_DATA"; + $v.="$current_segment\tSEGMENT"; + } + $self->{value} = $v; + last; + }; + /\.section/ && do { my $v=undef; + $$line =~ s/([^,]*).*/$1/; + $$line = ".CRT\$XCU" if ($$line eq ".init"); + if ($nasm) { + $v="section $$line"; + if ($$line=~/\.([px])data/) { + $v.=" rdata align="; + $v.=$1 eq "p"? 4 : 8; + } elsif ($$line=~/\.CRT\$/i) { + $v.=" rdata align=8"; + } + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $v.="$$line\tSEGMENT"; + if ($$line=~/\.([px])data/) { + $v.=" READONLY"; + $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); + } elsif ($$line=~/\.CRT\$/i) { + $v.=" READONLY "; + $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; + } + } + $current_segment = $$line; + $self->{value} = $v; + last; + }; + /\.extern/ && do { $self->{value} = "EXTERN\t".$$line; + $self->{value} .= ":NEAR" if ($masm); + last; + }; + /\.globl|.global/ + && do { $self->{value} = $masm?"PUBLIC":"global"; + $self->{value} .= "\t".$$line; + last; + }; + /\.size/ && do { if (defined($current_function)) { + undef $self->{value}; + if ($current_function->{abi} eq "svr4") { + $self->{value}="${decor}SEH_end_$current_function->{name}:"; + $self->{value}.=":\n" if($masm); + } + $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); + undef $current_function; + } + last; + }; + /\.align/ && do { my $max = ($masm && $masm>=$masmref) ? 256 : 4096; + $self->{value} = "ALIGN\t".($$line>$max?$max:$$line); + last; + }; + /\.(value|long|rva|quad)/ + && do { my $sz = substr($1,0,1); + my @arr = split(/,\s*/,$$line); + my $last = pop(@arr); + my $conv = sub { my $var=shift; + $var=~s/^(0b[0-1]+)/oct($1)/eig; + $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); + if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) + { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } + $var; + }; + + $sz =~ tr/bvlrq/BWDDQ/; + $self->{value} = "\tD$sz\t"; + for (@arr) { $self->{value} .= &$conv($_).","; } + $self->{value} .= &$conv($last); + last; + }; + /\.byte/ && do { my @str=split(/,\s*/,$$line); + map(s/(0b[0-1]+)/oct($1)/eig,@str); + map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm); + while ($#str>15) { + $self->{value}.="DB\t" + .join(",",@str[0..15])."\n"; + foreach (0..15) { shift @str; } + } + $self->{value}.="DB\t" + .join(",",@str) if (@str); + last; + }; + /\.comm/ && do { my @str=split(/,\s*/,$$line); + my $v=undef; + if ($nasm) { + $v.="common $prefix@str[0] @str[1]"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = "_DATA"; + $v.="$current_segment\tSEGMENT\n"; + $v.="COMM @str[0]:DWORD:".@str[1]/4; + } + $self->{value} = $v; + last; + }; + } + $$line = ""; + } + + $ret; + } + sub out { + my $self = shift; + $self->{value}; + } +} + +sub rex { + my $opcode=shift; + my ($dst,$src,$rex)=@_; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + push @$opcode,($rex|0x40) if ($rex); +} + +# Upon initial x86_64 introduction SSE>2 extensions were not introduced +# yet. In order not to be bothered by tracing exact assembler versions, +# but at the same time to provide a bare security minimum of AES-NI, we +# hard-code some instructions. Extensions past AES-NI on the other hand +# are traced by examining assembler version in individual perlasm +# modules... + +my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, + "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 ); + +my $movq = sub { # elderly gas can't handle inter-register movq + my $arg = shift; + my @opcode=(0x66); + if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { + my ($src,$dst)=($1,$2); + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,$src,$dst,0x8); + push @opcode,0x0f,0x7e; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + @opcode; + } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { + my ($src,$dst)=($2,$1); + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,$src,$dst,0x8); + push @opcode,0x0f,0x6e; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + @opcode; + } else { + (); + } +}; + +my $pextrd = sub { + if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { + my @opcode=(0x66); + my $imm=$1; + my $src=$2; + my $dst=$3; + if ($dst =~ /%r([0-9]+)d/) { $dst = $1; } + elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; } + rex(\@opcode,$src,$dst); + push @opcode,0x0f,0x3a,0x16; + push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M + push @opcode,$imm; + @opcode; + } else { + (); + } +}; + +my $pinsrd = sub { + if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + my $imm=$1; + my $src=$2; + my $dst=$3; + if ($src =~ /%r([0-9]+)/) { $src = $1; } + elsif ($src =~ /%e/) { $src = $regrm{$src}; } + rex(\@opcode,$dst,$src); + push @opcode,0x0f,0x3a,0x22; + push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M + push @opcode,$imm; + @opcode; + } else { + (); + } +}; + +my $pshufb = sub { + if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$2,$1); + push @opcode,0x0f,0x38,0x00; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + @opcode; + } else { + (); + } +}; + +my $palignr = sub { + if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x3a,0x0f; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + push @opcode,$1; + @opcode; + } else { + (); + } +}; + +my $pclmulqdq = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x66); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x3a,0x44; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + +my $rdrand = sub { + if (shift =~ /%[er](\w+)/) { + my @opcode=(); + my $dst=$1; + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,0,$dst,8); + push @opcode,0x0f,0xc7,0xf0|($dst&7); + @opcode; + } else { + (); + } +}; + +my $rdseed = sub { + if (shift =~ /%[er](\w+)/) { + my @opcode=(); + my $dst=$1; + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,0,$dst,8); + push @opcode,0x0f,0xc7,0xf8|($dst&7); + @opcode; + } else { + (); + } +}; + +sub rxb { + my $opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + + $rxb|=0x7<<5; + $rxb&=~(0x04<<5) if($dst>=8); + $rxb&=~(0x01<<5) if($src1>=8); + $rxb&=~(0x02<<5) if($src2>=8); + push @$opcode,$rxb; +} + +my $vprotd = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x8f); + rxb(\@opcode,$3,$2,-1,0x08); + push @opcode,0x78,0xc2; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + +my $vprotq = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x8f); + rxb(\@opcode,$3,$2,-1,0x08); + push @opcode,0x78,0xc3; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + +my $endbranch = sub { + (0xf3,0x0f,0x1e,0xfa); +}; + +if ($nasm) { + print <<___; +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +___ +} elsif ($masm) { + print <<___; +OPTION DOTNAME +___ +} +while(defined(my $line=<>)) { + + $line =~ s|\R$||; # Better chomp + + $line =~ s|[#!].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning + $line =~ s|\s+$||; # ... and at the end + + if (my $label=label->re(\$line)) { print $label->out(); } + + if (my $directive=directive->re(\$line)) { + printf "%s",$directive->out(); + } elsif (my $opcode=opcode->re(\$line)) { + my $asm = eval("\$".$opcode->mnemonic()); + + if ((ref($asm) eq 'CODE') && scalar(my @bytes=&$asm($line))) { + print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; + next; + } + + my @args; + ARGUMENT: while (1) { + my $arg; + + ($arg=register->re(\$line, $opcode))|| + ($arg=const->re(\$line)) || + ($arg=ea->re(\$line, $opcode)) || + ($arg=expr->re(\$line, $opcode)) || + last ARGUMENT; + + push @args,$arg; + + last ARGUMENT if ($line !~ /^,/); + + $line =~ s/^,\s*//; + } # ARGUMENT: + + if ($#args>=0) { + my $insn; + my $sz=$opcode->size(); + + if ($gas) { + $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); + @args = map($_->out($sz),@args); + printf "\t%s\t%s",$insn,join(",",@args); + } else { + $insn = $opcode->out(); + foreach (@args) { + my $arg = $_->out(); + # $insn.=$sz compensates for movq, pinsrw, ... + if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } + if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; } + if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; } + if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } + } + @args = reverse(@args); + undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); + printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); + } + } else { + printf "\t%s",$opcode->out(); + } + } + + print $line,"\n"; +} + +print "\n$current_segment\tENDS\n" if ($current_segment && $masm); +print "END\n" if ($masm); + +close STDOUT; + + ################################################# +# Cross-reference x86_64 ABI "card" +# +# Unix Win64 +# %rax * * +# %rbx - - +# %rcx #4 #1 +# %rdx #3 #2 +# %rsi #2 - +# %rdi #1 - +# %rbp - - +# %rsp - - +# %r8 #5 #3 +# %r9 #6 #4 +# %r10 * * +# %r11 * * +# %r12 - - +# %r13 - - +# %r14 - - +# %r15 - - +# +# (*) volatile register +# (-) preserved by callee +# (#) Nth argument, volatile +# +# In Unix terms top of stack is argument transfer area for arguments +# which could not be accommodated in registers. Or in other words 7th +# [integer] argument resides at 8(%rsp) upon function entry point. +# 128 bytes above %rsp constitute a "red zone" which is not touched +# by signal handlers and can be used as temporal storage without +# allocating a frame. +# +# In Win64 terms N*8 bytes on top of stack is argument transfer area, +# which belongs to/can be overwritten by callee. N is the number of +# arguments passed to callee, *but* not less than 4! This means that +# upon function entry point 5th argument resides at 40(%rsp), as well +# as that 32 bytes from 8(%rsp) can always be used as temporal +# storage [without allocating a frame]. One can actually argue that +# one can assume a "red zone" above stack pointer under Win64 as well. +# Point is that at apparently no occasion Windows kernel would alter +# the area above user stack pointer in true asynchronous manner... +# +# All the above means that if assembler programmer adheres to Unix +# register and stack layout, but disregards the "red zone" existence, +# it's possible to use following prologue and epilogue to "gear" from +# Unix to Win64 ABI in leaf functions with not more than 6 arguments. +# +# omnipotent_function: +# ifdef WIN64 +# movq %rdi,8(%rsp) +# movq %rsi,16(%rsp) +# movq %rcx,%rdi ; if 1st argument is actually present +# movq %rdx,%rsi ; if 2nd argument is actually ... +# movq %r8,%rdx ; if 3rd argument is ... +# movq %r9,%rcx ; if 4th argument ... +# movq 40(%rsp),%r8 ; if 5th ... +# movq 48(%rsp),%r9 ; if 6th ... +# endif +# ... +# ifdef WIN64 +# movq 8(%rsp),%rdi +# movq 16(%rsp),%rsi +# endif +# ret +# + ################################################# +# Win64 SEH, Structured Exception Handling. +# +# Unlike on Unix systems(*) lack of Win64 stack unwinding information +# has undesired side-effect at run-time: if an exception is raised in +# assembler subroutine such as those in question (basically we're +# referring to segmentation violations caused by malformed input +# parameters), the application is briskly terminated without invoking +# any exception handlers, most notably without generating memory dump +# or any user notification whatsoever. This poses a problem. It's +# possible to address it by registering custom language-specific +# handler that would restore processor context to the state at +# subroutine entry point and return "exception is not handled, keep +# unwinding" code. Writing such handler can be a challenge... But it's +# doable, though requires certain coding convention. Consider following +# snippet: +# +# .type function,@function +# function: +# movq %rsp,%rax # copy rsp to volatile register +# pushq %r15 # save non-volatile registers +# pushq %rbx +# pushq %rbp +# movq %rsp,%r11 +# subq %rdi,%r11 # prepare [variable] stack frame +# andq $-64,%r11 +# movq %rax,0(%r11) # check for exceptions +# movq %r11,%rsp # allocate [variable] stack frame +# movq %rax,0(%rsp) # save original rsp value +# magic_point: +# ... +# movq 0(%rsp),%rcx # pull original rsp value +# movq -24(%rcx),%rbp # restore non-volatile registers +# movq -16(%rcx),%rbx +# movq -8(%rcx),%r15 +# movq %rcx,%rsp # restore original rsp +# ret +# .size function,.-function +# +# The key is that up to magic_point copy of original rsp value remains +# in chosen volatile register and no non-volatile register, except for +# rsp, is modified. While past magic_point rsp remains constant till +# the very end of the function. In this case custom language-specific +# exception handler would look like this: +# +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +# { ULONG64 *rsp = (ULONG64 *)context->Rax; +# if (context->Rip >= magic_point) +# { rsp = ((ULONG64 **)context->Rsp)[0]; +# context->Rbp = rsp[-3]; +# context->Rbx = rsp[-2]; +# context->R15 = rsp[-1]; +# } +# context->Rsp = (ULONG64)rsp; +# context->Rdi = rsp[1]; +# context->Rsi = rsp[2]; +# +# memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); +# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, +# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, +# &disp->HandlerData,&disp->EstablisherFrame,NULL); +# return ExceptionContinueSearch; +# } +# +# It's appropriate to implement this handler in assembler, directly in +# function's module. In order to do that one has to know members' +# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant +# values. Here they are: +# +# CONTEXT.Rax 120 +# CONTEXT.Rcx 128 +# CONTEXT.Rdx 136 +# CONTEXT.Rbx 144 +# CONTEXT.Rsp 152 +# CONTEXT.Rbp 160 +# CONTEXT.Rsi 168 +# CONTEXT.Rdi 176 +# CONTEXT.R8 184 +# CONTEXT.R9 192 +# CONTEXT.R10 200 +# CONTEXT.R11 208 +# CONTEXT.R12 216 +# CONTEXT.R13 224 +# CONTEXT.R14 232 +# CONTEXT.R15 240 +# CONTEXT.Rip 248 +# CONTEXT.Xmm6 512 +# sizeof(CONTEXT) 1232 +# DISPATCHER_CONTEXT.ControlPc 0 +# DISPATCHER_CONTEXT.ImageBase 8 +# DISPATCHER_CONTEXT.FunctionEntry 16 +# DISPATCHER_CONTEXT.EstablisherFrame 24 +# DISPATCHER_CONTEXT.TargetIp 32 +# DISPATCHER_CONTEXT.ContextRecord 40 +# DISPATCHER_CONTEXT.LanguageHandler 48 +# DISPATCHER_CONTEXT.HandlerData 56 +# UNW_FLAG_NHANDLER 0 +# ExceptionContinueSearch 1 +# +# In order to tie the handler to the function one has to compose +# couple of structures: one for .xdata segment and one for .pdata. +# +# UNWIND_INFO structure for .xdata segment would be +# +# function_unwind_info: +# .byte 9,0,0,0 +# .rva handler +# +# This structure designates exception handler for a function with +# zero-length prologue, no stack frame or frame register. +# +# To facilitate composing of .pdata structures, auto-generated "gear" +# prologue copies rsp value to rax and denotes next instruction with +# .LSEH_begin_{function_name} label. This essentially defines the SEH +# styling rule mentioned in the beginning. Position of this label is +# chosen in such manner that possible exceptions raised in the "gear" +# prologue would be accounted to caller and unwound from latter's frame. +# End of function is marked with respective .LSEH_end_{function_name} +# label. To summarize, .pdata segment would contain +# +# .rva .LSEH_begin_function +# .rva .LSEH_end_function +# .rva function_unwind_info +# +# Reference to function_unwind_info from .xdata segment is the anchor. +# In case you wonder why references are 32-bit .rvas and not 64-bit +# .quads. References put into these two segments are required to be +# *relative* to the base address of the current binary module, a.k.a. +# image base. No Win64 module, be it .exe or .dll, can be larger than +# 2GB and thus such relative references can be and are accommodated in +# 32 bits. +# +# Having reviewed the example function code, one can argue that "movq +# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix +# rax would contain an undefined value. If this "offends" you, use +# another register and refrain from modifying rax till magic_point is +# reached, i.e. as if it was a non-volatile register. If more registers +# are required prior [variable] frame setup is completed, note that +# nobody says that you can have only one "magic point." You can +# "liberate" non-volatile registers by denoting last stack off-load +# instruction and reflecting it in finer grade unwind logic in handler. +# After all, isn't it why it's called *language-specific* handler... +# +# Attentive reader can notice that exceptions would be mishandled in +# auto-generated "gear" epilogue. Well, exception effectively can't +# occur there, because if memory area used by it was subject to +# segmentation violation, then it would be raised upon call to the +# function (and as already mentioned be accounted to caller, which is +# not a problem). If you're still not comfortable, then define tail +# "magic point" just prior ret instruction and have handler treat it... +# +# (*) Note that we're talking about run-time, not debug-time. Lack of +# unwind information makes debugging hard on both Windows and +# Unix. "Unlike" referes to the fact that on Unix signal handler +# will always be invoked, core dumped and appropriate exit code +# returned to parent (for user notification). diff --git a/openssl-1.1.0h/crypto/perlasm/x86asm.pl b/openssl-1.1.0h/crypto/perlasm/x86asm.pl new file mode 100644 index 0000000..1ff46c9 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86asm.pl @@ -0,0 +1,310 @@ +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# require 'x86asm.pl'; +# &asm_init(,"des-586.pl"[,$i386only]); +# &function_begin("foo"); +# ... +# &function_end("foo"); +# &asm_finish + +$out=(); +$i386=0; + +# AUTOLOAD is this context has quite unpleasant side effect, namely +# that typos in function calls effectively go to assembler output, +# but on the pros side we don't have to implement one subroutine per +# each opcode... +sub ::AUTOLOAD +{ my $opcode = $AUTOLOAD; + + die "more than 4 arguments passed to $opcode" if ($#_>3); + + $opcode =~ s/.*:://; + if ($opcode =~ /^push/) { $stack+=4; } + elsif ($opcode =~ /^pop/) { $stack-=4; } + + &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD"; +} + +sub ::emit +{ my $opcode=shift; + + if ($#_==-1) { push(@out,"\t$opcode\n"); } + else { push(@out,"\t$opcode\t".join(',',@_)."\n"); } +} + +sub ::LB +{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'"; + $1."l"; +} +sub ::HB +{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'"; + $1."h"; +} +sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num); } +sub ::stack_pop { my $num=$_[0]*4; $stack-=$num; &add("esp",$num); } +sub ::blindpop { &pop($_[0]); $stack+=4; } +sub ::wparam { &DWP($stack+4*$_[0],"esp"); } +sub ::swtmp { &DWP(4*$_[0],"esp"); } + +sub ::bswap +{ if ($i386) # emulate bswap for i386 + { &comment("bswap @_"); + &xchg(&HB(@_),&LB(@_)); + &ror (@_,16); + &xchg(&HB(@_),&LB(@_)); + } + else + { &generic("bswap",@_); } +} +# These are made-up opcodes introduced over the years essentially +# by ignorance, just alias them to real ones... +sub ::movb { &mov(@_); } +sub ::xorb { &xor(@_); } +sub ::rotl { &rol(@_); } +sub ::rotr { &ror(@_); } +sub ::exch { &xchg(@_); } +sub ::halt { &hlt; } +sub ::movz { &movzx(@_); } +sub ::pushf { &pushfd; } +sub ::popf { &popfd; } + +# 3 argument instructions +sub ::movq +{ my($p1,$p2,$optimize)=@_; + + if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) + # movq between mmx registers can sink Intel CPUs + { &::pshufw($p1,$p2,0xe4); } + else + { &::generic("movq",@_); } +} + +# SSE>2 instructions +my %regrm = ( "eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3, + "esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7 ); +sub ::pextrd +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm); } + else + { &::generic("pextrd",@_); } +} + +sub ::pinsrd +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/) + { &::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm); } + else + { &::generic("pinsrd",@_); } +} + +sub ::pshufb +{ my($dst,$src)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2); } + else + { &::generic("pshufb",@_); } +} + +sub ::palignr +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm); } + else + { &::generic("palignr",@_); } +} + +sub ::pclmulqdq +{ my($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm); } + else + { &::generic("pclmulqdq",@_); } +} + +sub ::rdrand +{ my ($dst)=@_; + if ($dst =~ /(e[a-dsd][ixp])/) + { &::data_byte(0x0f,0xc7,0xf0|$regrm{$dst}); } + else + { &::generic("rdrand",@_); } +} + +sub ::rdseed +{ my ($dst)=@_; + if ($dst =~ /(e[a-dsd][ixp])/) + { &::data_byte(0x0f,0xc7,0xf8|$regrm{$dst}); } + else + { &::generic("rdrand",@_); } +} + +sub rxb { + local *opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + + $rxb|=0x7<<5; + $rxb&=~(0x04<<5) if($dst>=8); + $rxb&=~(0x01<<5) if($src1>=8); + $rxb&=~(0x02<<5) if($src2>=8); + push @opcode,$rxb; +} + +sub ::vprotd +{ my $args=join(',',@_); + if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/) + { my @opcode=(0x8f); + rxb(\@opcode,$1,$2,-1,0x08); + push @opcode,0x78,0xc2; + push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M + my $c=$3; + push @opcode,$c=~/^0/?oct($c):$c; + &::data_byte(@opcode); + } + else + { &::generic("vprotd",@_); } +} + +sub ::endbranch +{ + &::data_byte(0xf3,0x0f,0x1e,0xfb); +} + +# label management +$lbdecor="L"; # local label decoration, set by package +$label="000"; + +sub ::islabel # see is argument is a known label +{ my $i; + foreach $i (values %label) { return $i if ($i eq $_[0]); } + $label{$_[0]}; # can be undef +} + +sub ::label # instantiate a function-scope label +{ if (!defined($label{$_[0]})) + { $label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++; } + $label{$_[0]}; +} + +sub ::LABEL # instantiate a file-scope label +{ $label{$_[0]}=$_[1] if (!defined($label{$_[0]})); + $label{$_[0]}; +} + +sub ::static_label { &::LABEL($_[0],$lbdecor.$_[0]); } + +sub ::set_label_B { push(@out,"@_:\n"); } +sub ::set_label +{ my $label=&::label($_[0]); + &::align($_[1]) if ($_[1]>1); + &::set_label_B($label); + $label; +} + +sub ::wipe_labels # wipes function-scope labels +{ foreach $i (keys %label) + { delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/); } +} + +# subroutine management +sub ::function_begin +{ &function_begin_B(@_); + $stack=4; + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); +} + +sub ::function_end +{ &pop("edi"); + &pop("esi"); + &pop("ebx"); + &pop("ebp"); + &ret(); + &function_end_B(@_); + $stack=0; + &wipe_labels(); +} + +sub ::function_end_A +{ &pop("edi"); + &pop("esi"); + &pop("ebx"); + &pop("ebp"); + &ret(); + $stack+=16; # readjust esp as if we didn't pop anything +} + +sub ::asciz +{ my @str=unpack("C*",shift); + push @str,0; + while ($#str>15) { + &data_byte(@str[0..15]); + foreach (0..15) { shift @str; } + } + &data_byte(@str) if (@str); +} + +sub ::asm_finish +{ &file_end(); + print @out; +} + +sub ::asm_init +{ my ($type,$fn,$cpu)=@_; + + $filename=$fn; + $i386=$cpu; + + $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; + if (($type eq "elf")) + { $elf=1; require "x86gas.pl"; } + elsif (($type eq "elf-1")) + { $elf=-1; require "x86gas.pl"; } + elsif (($type eq "a\.out")) + { $aout=1; require "x86gas.pl"; } + elsif (($type eq "coff" or $type eq "gaswin")) + { $coff=1; require "x86gas.pl"; } + elsif (($type eq "win32n")) + { $win32=1; require "x86nasm.pl"; } + elsif (($type eq "nw-nasm")) + { $netware=1; require "x86nasm.pl"; } + #elsif (($type eq "nw-mwasm")) + #{ $netware=1; $mwerks=1; require "x86nasm.pl"; } + elsif (($type eq "win32")) + { $win32=1; require "x86masm.pl"; } + elsif (($type eq "macosx")) + { $aout=1; $macosx=1; require "x86gas.pl"; } + elsif (($type eq "android")) + { $elf=1; $android=1; require "x86gas.pl"; } + else + { print STDERR <<"EOF"; +Pick one target type from + elf - Linux, FreeBSD, Solaris x86, etc. + a.out - DJGPP, elder OpenBSD, etc. + coff - GAS/COFF such as Win32 targets + win32n - Windows 95/Windows NT NASM format + nw-nasm - NetWare NASM format + macosx - Mac OS X +EOF + exit(1); + } + + $pic=0; + for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } + + $filename =~ s/\.pl$//; + &file($filename); +} + +sub ::hidden {} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86gas.pl b/openssl-1.1.0h/crypto/perlasm/x86gas.pl new file mode 100644 index 0000000..2c8fce0 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86gas.pl @@ -0,0 +1,265 @@ +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +package x86gas; + +*out=\@::out; + +$::lbdecor=$::aout?"L":".L"; # local label decoration +$nmdecor=($::aout or $::coff)?"_":""; # external name decoration + +$initseg=""; + +$align=16; +$align=log($align)/log(2) if ($::aout); +$com_start="#" if ($::aout or $::coff); + +sub opsize() +{ my $reg=shift; + if ($reg =~ m/^%e/o) { "l"; } + elsif ($reg =~ m/^%[a-d][hl]$/o) { "b"; } + elsif ($reg =~ m/^%[yxm]/o) { undef; } + else { "w"; } +} + +# swap arguments; +# expand opcode with size suffix; +# prefix numeric constants with $; +sub ::generic +{ my($opcode,@arg)=@_; + my($suffix,$dst,$src); + + @arg=reverse(@arg); + + for (@arg) + { s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o; # gp registers + s/^([xy]?mm[0-7])$/%$1/o; # xmm/mmx registers + s/^(\-?[0-9]+)$/\$$1/o; # constants + s/^(\-?0x[0-9a-f]+)$/\$$1/o; # constants + } + + $dst = $arg[$#arg] if ($#arg>=0); + $src = $arg[$#arg-1] if ($#arg>=1); + if ($dst =~ m/^%/o) { $suffix=&opsize($dst); } + elsif ($src =~ m/^%/o) { $suffix=&opsize($src); } + else { $suffix="l"; } + undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); + + if ($#_==0) { &::emit($opcode); } + elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) + { &::emit($opcode,@arg); } + else { &::emit($opcode.$suffix,@arg);} + + 1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::movzx { &::movzb(@_); } +sub ::pushfd { &::pushfl; } +sub ::popfd { &::popfl; } +sub ::cpuid { &::emit(".byte\t0x0f,0xa2"); } +sub ::rdtsc { &::emit(".byte\t0x0f,0x31"); } + +sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr { &::generic("call","*$_[0]"); } +sub ::jmp_ptr { &::generic("jmp","*$_[0]"); } + +*::bswap = sub { &::emit("bswap","%$_[0]"); } if (!$::i386); + +sub ::DWP +{ my($addr,$reg1,$reg2,$idx)=@_; + my $ret=""; + + if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + + $addr =~ s/^\s+//; + # prepend global references with optional underscore + $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; + + $reg1 = "%$reg1" if ($reg1); + $reg2 = "%$reg2" if ($reg2); + + $ret .= $addr if (($addr ne "") && ($addr ne 0)); + + if ($reg2) + { $idx!= 0 or $idx=1; + $ret .= "($reg1,$reg2,$idx)"; + } + elsif ($reg1) + { $ret .= "($reg1)"; } + + $ret; +} +sub ::QWP { &::DWP(@_); } +sub ::BP { &::DWP(@_); } +sub ::WP { &::DWP(@_); } +sub ::BC { @_; } +sub ::DWC { @_; } + +sub ::file +{ push(@out,".file\t\"$_[0].s\"\n.text\n"); } + +sub ::function_begin_B +{ my $func=shift; + my $global=($func !~ /^_/); + my $begin="${::lbdecor}_${func}_begin"; + + &::LABEL($func,$global?"$begin":"$nmdecor$func"); + $func=$nmdecor.$func; + + push(@out,".globl\t$func\n") if ($global); + if ($::coff) + { push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); } + elsif (($::aout and !$::pic) or $::macosx) + { } + else + { push(@out,".type $func,\@function\n"); } + push(@out,".align\t$align\n"); + push(@out,"$func:\n"); + push(@out,"$begin:\n") if ($global); + $::stack=4; +} + +sub ::function_end_B +{ my $func=shift; + push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf); + $::stack=0; + &::wipe_labels(); +} + +sub ::comment + { + if (!defined($com_start) or $::elf) + { # Regarding $::elf above... + # GNU and SVR4 as'es use different comment delimiters, + push(@out,"\n"); # so we just skip ELF comments... + return; + } + foreach (@_) + { + if (/^\s*$/) + { push(@out,"\n"); } + else + { push(@out,"\t$com_start $_ $com_end\n"); } + } + } + +sub ::external_label +{ foreach(@_) { &::LABEL($_,$nmdecor.$_); } } + +sub ::public_label +{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } + +sub ::file_end +{ if ($::macosx) + { if (%non_lazy_ptr) + { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); + foreach $i (keys %non_lazy_ptr) + { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } + } + } + if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16"; + if ($::macosx) { push (@out,"$tmp,2\n"); } + elsif ($::elf) { push (@out,"$tmp,4\n"); } + else { push (@out,"$tmp\n"); } + } + push(@out,$initseg) if ($initseg); +} + +sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } +sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); } +sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } + +sub ::align +{ my $val=$_[0]; + if ($::aout) + { $val=int(log($val)/log(2)); + $val.=",0x90"; + } + push(@out,".align\t$val\n"); +} + +sub ::picmeup +{ my($dst,$sym,$base,$reflabel)=@_; + + if (($::pic && ($::elf || $::aout)) || $::macosx) + { if (!defined($base)) + { &::call(&::label("PIC_me_up")); + &::set_label("PIC_me_up"); + &::blindpop($dst); + $base=$dst; + $reflabel=&::label("PIC_me_up"); + } + if ($::macosx) + { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); + &::mov($dst,&::DWP("$indirect-$reflabel",$base)); + $non_lazy_ptr{"$nmdecor$sym"}=$indirect; + } + elsif ($sym eq "OPENSSL_ia32cap_P" && $::elf>0) + { &::lea($dst,&::DWP("$sym-$reflabel",$base)); } + else + { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", + $base)); + &::mov($dst,&::DWP("$sym\@GOT",$dst)); + } + } + else + { &::lea($dst,&::DWP($sym)); } +} + +sub ::initseg +{ my $f=$nmdecor.shift; + + if ($::android) + { $initseg.=<<___; +.section .init_array +.align 4 +.long $f +___ + } + elsif ($::elf) + { $initseg.=<<___; +.section .init + call $f +___ + } + elsif ($::coff) + { $initseg.=<<___; # applies to both Cygwin and Mingw +.section .ctors +.long $f +___ + } + elsif ($::macosx) + { $initseg.=<<___; +.mod_init_func +.align 2 +.long $f +___ + } + elsif ($::aout) + { my $ctor="${nmdecor}_GLOBAL_\$I\$$f"; + $initseg.=".text\n"; + $initseg.=".type $ctor,\@function\n" if ($::pic); + $initseg.=<<___; # OpenBSD way... +.globl $ctor +.align 2 +$ctor: + jmp $f +___ + } +} + +sub ::dataseg +{ push(@out,".data\n"); } + +*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf); + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86masm.pl b/openssl-1.1.0h/crypto/perlasm/x86masm.pl new file mode 100644 index 0000000..d352f47 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86masm.pl @@ -0,0 +1,207 @@ +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +package x86masm; + +*out=\@::out; + +$::lbdecor="\$L"; # local label decoration +$nmdecor="_"; # external name decoration + +$initseg=""; +$segment=""; + +sub ::generic +{ my ($opcode,@arg)=@_; + + # fix hexadecimal constants + for (@arg) { s/(?= 0x02030000\n"); + push(@out,"safeseh ".&::LABEL($nm,$nmdecor.$nm)."\n"); + push(@out,"%endif\n"); +} + +1; -- cgit v1.2.3