aboutsummaryrefslogtreecommitdiff
path: root/openssl-1.1.0h/crypto/perlasm
diff options
context:
space:
mode:
Diffstat (limited to 'openssl-1.1.0h/crypto/perlasm')
-rw-r--r--openssl-1.1.0h/crypto/perlasm/README124
-rwxr-xr-xopenssl-1.1.0h/crypto/perlasm/arm-xlate.pl177
-rw-r--r--openssl-1.1.0h/crypto/perlasm/cbc.pl356
-rwxr-xr-xopenssl-1.1.0h/crypto/perlasm/ppc-xlate.pl265
-rw-r--r--openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl1702
-rwxr-xr-xopenssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl1186
-rw-r--r--openssl-1.1.0h/crypto/perlasm/x86asm.pl310
-rw-r--r--openssl-1.1.0h/crypto/perlasm/x86gas.pl265
-rw-r--r--openssl-1.1.0h/crypto/perlasm/x86masm.pl207
-rw-r--r--openssl-1.1.0h/crypto/perlasm/x86nasm.pl186
10 files changed, 4778 insertions, 0 deletions
diff --git a/openssl-1.1.0h/crypto/perlasm/README b/openssl-1.1.0h/crypto/perlasm/README
new file mode 100644
index 0000000..e90bd8e
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/README
@@ -0,0 +1,124 @@
+The perl scripts in this directory are my 'hack' to generate
+multiple different assembler formats via the one original script.
+
+The way to use this library is to start with adding the path to this directory
+and then include it.
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+The first thing we do is setup the file and type of assembler
+
+&asm_init($ARGV[0],$0);
+
+The first argument is the 'type'. Currently
+'cpp', 'sol', 'a.out', 'elf' or 'win32'.
+Argument 2 is the file name.
+
+The reciprocal function is
+&asm_finish() which should be called at the end.
+
+There are 2 main 'packages'. x86ms.pl, which is the Microsoft assembler,
+and x86unix.pl which is the unix (gas) version.
+
+Functions of interest are:
+&external_label("des_SPtrans"); declare and external variable
+&LB(reg); Low byte for a register
+&HB(reg); High byte for a register
+&BP(off,base,index,scale) Byte pointer addressing
+&DWP(off,base,index,scale) Word pointer addressing
+&stack_push(num) Basically a 'sub esp, num*4' with extra
+&stack_pop(num) inverse of stack_push
+&function_begin(name,extra) Start a function with pushing of
+ edi, esi, ebx and ebp. extra is extra win32
+ external info that may be required.
+&function_begin_B(name,extra) Same as normal function_begin but no pushing.
+&function_end(name) Call at end of function.
+&function_end_A(name) Standard pop and ret, for use inside functions
+&function_end_B(name) Call at end but with poping or 'ret'.
+&swtmp(num) Address on stack temp word.
+&wparam(num) Parameter number num, that was push
+ in C convention. This all works over pushes
+ and pops.
+&comment("hello there") Put in a comment.
+&label("loop") Refer to a label, normally a jmp target.
+&set_label("loop") Set a label at this point.
+&data_word(word) Put in a word of data.
+
+So how does this all hold together? Given
+
+int calc(int len, int *data)
+ {
+ int i,j=0;
+
+ for (i=0; i<len; i++)
+ {
+ j+=other(data[i]);
+ }
+ }
+
+So a very simple version of this function could be coded as
+
+ push(@INC,"perlasm","../../perlasm");
+ require "x86asm.pl";
+
+ &asm_init($ARGV[0],"cacl.pl");
+
+ &external_label("other");
+
+ $tmp1= "eax";
+ $j= "edi";
+ $data= "esi";
+ $i= "ebp";
+
+ &comment("a simple function");
+ &function_begin("calc");
+ &mov( $data, &wparam(1)); # data
+ &xor( $j, $j);
+ &xor( $i, $i);
+
+ &set_label("loop");
+ &cmp( $i, &wparam(0));
+ &jge( &label("end"));
+
+ &mov( $tmp1, &DWP(0,$data,$i,4));
+ &push( $tmp1);
+ &call( "other");
+ &add( $j, "eax");
+ &pop( $tmp1);
+ &inc( $i);
+ &jmp( &label("loop"));
+
+ &set_label("end");
+ &mov( "eax", $j);
+
+ &function_end("calc");
+
+ &asm_finish();
+
+The above example is very very unoptimised but gives an idea of how
+things work.
+
+There is also a cbc mode function generator in cbc.pl
+
+&cbc( $name,
+ $encrypt_function_name,
+ $decrypt_function_name,
+ $true_if_byte_swap_needed,
+ $parameter_number_for_iv,
+ $parameter_number_for_encrypt_flag,
+ $first_parameter_to_pass,
+ $second_parameter_to_pass,
+ $third_parameter_to_pass);
+
+So for example, given
+void BF_encrypt(BF_LONG *data,BF_KEY *key);
+void BF_decrypt(BF_LONG *data,BF_KEY *key);
+void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length,
+ BF_KEY *ks, unsigned char *iv, int enc);
+
+&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1);
+
+&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1);
+&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5);
+
diff --git a/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl b/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl
new file mode 100755
index 0000000..ca2f8b9
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl
@@ -0,0 +1,177 @@
+#! /usr/bin/env perl
+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+use strict;
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+$flavour = "linux32" if (!$flavour or $flavour eq "void");
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $arch = sub {
+ if ($flavour =~ /linux/) { ".arch\t".join(',',@_); }
+ else { ""; }
+};
+my $fpu = sub {
+ if ($flavour =~ /linux/) { ".fpu\t".join(',',@_); }
+ else { ""; }
+};
+my $hidden = sub {
+ if ($flavour =~ /ios/) { ".private_extern\t".join(',',@_); }
+ else { ".hidden\t".join(',',@_); }
+};
+my $comm = sub {
+ my @args = split(/,\s*/,shift);
+ my $name = @args[0];
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ if ($flavour =~ /ios32/) {
+ $ret = ".comm\t_$name,@args[1]\n";
+ $ret .= ".non_lazy_symbol_pointer\n";
+ $ret .= "$name:\n";
+ $ret .= ".indirect_symbol\t_$name\n";
+ $ret .= ".long\t0";
+ $name = "_$name";
+ } else { $ret = ".comm\t".join(',',@args); }
+
+ $$global = $name;
+ $ret;
+};
+my $globl = sub {
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ SWITCH: for ($flavour) {
+ /ios/ && do { $name = "_$name";
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $global = $globl;
+my $extern = sub {
+ &$globl(@_);
+ return; # return nothing
+};
+my $type = sub {
+ if ($flavour =~ /linux/) { ".type\t".join(',',@_); }
+ elsif ($flavour =~ /ios32/) { if (join(',',@_) =~ /(\w+),%function/) {
+ "#ifdef __thumb2__\n".
+ ".thumb_func $1\n".
+ "#endif";
+ }
+ }
+ else { ""; }
+};
+my $size = sub {
+ if ($flavour =~ /linux/) { ".size\t".join(',',@_); }
+ else { ""; }
+};
+my $inst = sub {
+ if ($flavour =~ /linux/) { ".inst\t".join(',',@_); }
+ else { ".long\t".join(',',@_); }
+};
+my $asciz = sub {
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+
+sub range {
+ my ($r,$sfx,$start,$end) = @_;
+
+ join(",",map("$r$_$sfx",($start..$end)));
+}
+
+sub expand_line {
+ my $line = shift;
+ my @ret = ();
+
+ pos($line)=0;
+
+ while ($line =~ m/\G[^@\/\{\"]*/g) {
+ if ($line =~ m/\G(@|\/\/|$)/gc) {
+ last;
+ }
+ elsif ($line =~ m/\G\{/gc) {
+ my $saved_pos = pos($line);
+ $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e;
+ pos($line) = $saved_pos;
+ $line =~ m/\G[^\}]*\}/g;
+ }
+ elsif ($line =~ m/\G\"/gc) {
+ $line =~ m/\G[^\"]*\"/g;
+ }
+ }
+
+ $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge;
+
+ return $line;
+}
+
+while(my $line=<>) {
+
+ if ($line =~ m/^\s*(#|@|\/\/)/) { print $line; next; }
+
+ $line =~ s|/\*.*\*/||; # get rid of C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|[\b\.]L(\w{2,})|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w{2,})|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ printf "%s:",($GLOBALS{$label} or $label);
+ }
+ }
+
+ if ($line !~ m/^[#@]/) {
+ $line =~ s|^\s*(\.?)(\S+)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $opcode;
+ if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) {
+ $opcode = eval("\$$1_$2");
+ } else {
+ $opcode = eval("\$$mnemonic");
+ }
+
+ my $arg=expand_line($line);
+
+ if (ref($opcode) eq 'CODE') {
+ $line = &$opcode($arg);
+ } elsif ($mnemonic) {
+ $line = $c.$mnemonic;
+ $line.= "\t$arg" if ($arg ne "");
+ }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/openssl-1.1.0h/crypto/perlasm/cbc.pl b/openssl-1.1.0h/crypto/perlasm/cbc.pl
new file mode 100644
index 0000000..ad79b24
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/cbc.pl
@@ -0,0 +1,356 @@
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
+# des_cblock (*input);
+# des_cblock (*output);
+# long length;
+# des_key_schedule schedule;
+# des_cblock (*ivec);
+# int enc;
+#
+# calls
+# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
+#
+
+#&cbc("des_ncbc_encrypt","des_encrypt",0);
+#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",
+# 1,4,5,3,5,-1);
+#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",
+# 0,4,5,3,5,-1);
+#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",
+# 0,6,7,3,4,5);
+#
+# When doing a cipher that needs bigendian order,
+# for encrypt, the iv is kept in bigendian form,
+# while for decrypt, it is kept in little endian.
+sub cbc
+ {
+ local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_;
+ # name is the function name
+ # enc_func and dec_func and the functions to call for encrypt/decrypt
+ # swap is true if byte order needs to be reversed
+ # iv_off is parameter number for the iv
+ # enc_off is parameter number for the encrypt/decrypt flag
+ # p1,p2,p3 are the offsets for parameters to be passed to the
+ # underlying calls.
+
+ &function_begin_B($name,"");
+ &comment("");
+
+ $in="esi";
+ $out="edi";
+ $count="ebp";
+
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+
+ $data_off=4;
+ $data_off+=4 if ($p1 > 0);
+ $data_off+=4 if ($p2 > 0);
+ $data_off+=4 if ($p3 > 0);
+
+ &mov($count, &wparam(2)); # length
+
+ &comment("getting iv ptr from parameter $iv_off");
+ &mov("ebx", &wparam($iv_off)); # Get iv ptr
+
+ &mov($in, &DWP(0,"ebx","",0));# iv[0]
+ &mov($out, &DWP(4,"ebx","",0));# iv[1]
+
+ &push($out);
+ &push($in);
+ &push($out); # used in decrypt for iv[1]
+ &push($in); # used in decrypt for iv[0]
+
+ &mov("ebx", "esp"); # This is the address of tin[2]
+
+ &mov($in, &wparam(0)); # in
+ &mov($out, &wparam(1)); # out
+
+ # We have loaded them all, how lets push things
+ &comment("getting encrypt flag from parameter $enc_off");
+ &mov("ecx", &wparam($enc_off)); # Get enc flag
+ if ($p3 > 0)
+ {
+ &comment("get and push parameter $p3");
+ if ($enc_off != $p3)
+ { &mov("eax", &wparam($p3)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p2 > 0)
+ {
+ &comment("get and push parameter $p2");
+ if ($enc_off != $p2)
+ { &mov("eax", &wparam($p2)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p1 > 0)
+ {
+ &comment("get and push parameter $p1");
+ if ($enc_off != $p1)
+ { &mov("eax", &wparam($p1)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ &push("ebx"); # push data/iv
+
+ &cmp("ecx",0);
+ &jz(&label("decrypt"));
+
+ &and($count,0xfffffff8);
+ &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0]
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1]
+
+ &jz(&label("encrypt_finish"));
+
+ #############################################################
+
+ &set_label("encrypt_loop");
+ # encrypt start
+ # "eax" and "ebx" hold iv (or the last cipher text)
+
+ &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ # eax and ebx are the next iv.
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("encrypt_loop"));
+
+###################################################################3
+ &set_label("encrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+ &call(&label("PIC_point"));
+&set_label("PIC_point");
+ &blindpop("edx");
+ &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+ &mov($count,&DWP(0,"ecx",$count,4));
+ &add($count,"edx");
+ &xor("ecx","ecx");
+ &xor("edx","edx");
+ #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
+ &jmp_ptr($count);
+
+&set_label("ej7");
+ &movb(&HB("edx"), &BP(6,$in,"",0));
+ &shl("edx",8);
+&set_label("ej6");
+ &movb(&HB("edx"), &BP(5,$in,"",0));
+&set_label("ej5");
+ &movb(&LB("edx"), &BP(4,$in,"",0));
+&set_label("ej4");
+ &mov("ecx", &DWP(0,$in,"",0));
+ &jmp(&label("ejend"));
+&set_label("ej3");
+ &movb(&HB("ecx"), &BP(2,$in,"",0));
+ &shl("ecx",8);
+&set_label("ej2");
+ &movb(&HB("ecx"), &BP(1,$in,"",0));
+&set_label("ej1");
+ &movb(&LB("ecx"), &BP(0,$in,"",0));
+&set_label("ejend");
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ &jmp(&label("finish"));
+
+ #############################################################
+ #############################################################
+ &set_label("decrypt",1);
+ # decrypt start
+ &and($count,0xfffffff8);
+ # The next 2 instructions are only for if the jz is taken
+ &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+ &jz(&label("decrypt_finish"));
+
+ &set_label("decrypt_loop");
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+ &mov(&DWP(0,$out,"",0),"ecx");
+ &mov(&DWP(4,$out,"",0),"edx");
+
+ &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv
+ &mov(&DWP($data_off+12,"esp","",0), "ebx"); #
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("decrypt_loop"));
+############################ ENDIT #######################3
+ &set_label("decrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ # this is for when we exit
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+&set_label("dj7");
+ &rotr("edx", 16);
+ &movb(&BP(6,$out,"",0), &LB("edx"));
+ &shr("edx",16);
+&set_label("dj6");
+ &movb(&BP(5,$out,"",0), &HB("edx"));
+&set_label("dj5");
+ &movb(&BP(4,$out,"",0), &LB("edx"));
+&set_label("dj4");
+ &mov(&DWP(0,$out,"",0), "ecx");
+ &jmp(&label("djend"));
+&set_label("dj3");
+ &rotr("ecx", 16);
+ &movb(&BP(2,$out,"",0), &LB("ecx"));
+ &shl("ecx",16);
+&set_label("dj2");
+ &movb(&BP(1,$in,"",0), &HB("ecx"));
+&set_label("dj1");
+ &movb(&BP(0,$in,"",0), &LB("ecx"));
+&set_label("djend");
+
+ # final iv is still in eax:ebx
+ &jmp(&label("finish"));
+
+
+############################ FINISH #######################3
+ &set_label("finish",1);
+ &mov("ecx", &wparam($iv_off)); # Get iv ptr
+
+ #################################################
+ $total=16+4;
+ $total+=4 if ($p1 > 0);
+ $total+=4 if ($p2 > 0);
+ $total+=4 if ($p3 > 0);
+ &add("esp",$total);
+
+ &mov(&DWP(0,"ecx","",0), "eax"); # save iv
+ &mov(&DWP(4,"ecx","",0), "ebx"); # save iv
+
+ &function_end_A($name);
+
+ &align(64);
+ &set_label("cbc_enc_jmp_table");
+ &data_word("0");
+ &data_word(&label("ej1")."-".&label("PIC_point"));
+ &data_word(&label("ej2")."-".&label("PIC_point"));
+ &data_word(&label("ej3")."-".&label("PIC_point"));
+ &data_word(&label("ej4")."-".&label("PIC_point"));
+ &data_word(&label("ej5")."-".&label("PIC_point"));
+ &data_word(&label("ej6")."-".&label("PIC_point"));
+ &data_word(&label("ej7")."-".&label("PIC_point"));
+ # not used
+ #&set_label("cbc_dec_jmp_table",1);
+ #&data_word("0");
+ #&data_word(&label("dj1")."-".&label("PIC_point"));
+ #&data_word(&label("dj2")."-".&label("PIC_point"));
+ #&data_word(&label("dj3")."-".&label("PIC_point"));
+ #&data_word(&label("dj4")."-".&label("PIC_point"));
+ #&data_word(&label("dj5")."-".&label("PIC_point"));
+ #&data_word(&label("dj6")."-".&label("PIC_point"));
+ #&data_word(&label("dj7")."-".&label("PIC_point"));
+ &align(64);
+
+ &function_end_B($name);
+
+ }
+
+1;
diff --git a/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl
new file mode 100755
index 0000000..2d46e24
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl
@@ -0,0 +1,265 @@
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ $name =~ s|^[\.\_]||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { $name = ".$name";
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux.*(32|64le)/
+ && do { $ret .= ".globl $name\n";
+ $ret .= ".type $name,\@function";
+ last;
+ };
+ /linux.*64/ && do { $ret .= ".globl $name\n";
+ $ret .= ".type $name,\@function\n";
+ $ret .= ".section \".opd\",\"aw\"\n";
+ $ret .= ".align 3\n";
+ $ret .= "$name:\n";
+ $ret .= ".quad .$name,.TOC.\@tocbase,0\n";
+ $ret .= ".previous\n";
+
+ $name = ".$name";
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift; $name =~ s|^[\.\_]||;
+ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
+ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $mtspr = sub {
+ my ($f,$idx,$ra) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " or $ra,$ra,$ra";
+ } else {
+ " mtspr $idx,$ra";
+ }
+};
+my $mfspr = sub {
+ my ($f,$rd,$idx) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " li $rd,-1";
+ } else {
+ " mfspr $rd,$idx";
+ }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+# PowerISA 3.0 stuff
+my $maddhdu = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
+};
+my $maddld = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
+};
+
+my $darn = sub {
+ my ($f, $rt, $l) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
+};
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ printf "%s:",($GLOBALS{$label} or $label);
+ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
+ }
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl
new file mode 100644
index 0000000..bfdada8
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl
@@ -0,0 +1,1702 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# Specific modes implementations for SPARC Architecture 2011. There
+# is T4 dependency though, an ASI value that is not specified in the
+# Architecture Manual. But as SPARC universe is rather monocultural,
+# we imply that processor capable of executing crypto instructions
+# can handle the ASI in question as well. This means that we ought to
+# keep eyes open when new processors emerge...
+#
+# As for above mentioned ASI. It's so called "block initializing
+# store" which cancels "read" in "read-update-write" on cache lines.
+# This is "cooperative" optimization, as it reduces overall pressure
+# on memory interface. Benefits can't be observed/quantified with
+# usual benchmarks, on the contrary you can notice that single-thread
+# performance for parallelizable modes is ~1.5% worse for largest
+# block sizes [though few percent better for not so long ones]. All
+# this based on suggestions from David Miller.
+
+$::bias="STACK_BIAS";
+$::frame="STACK_FRAME";
+$::size_t_cc="SIZE_T_CC";
+
+sub asm_init { # to be called with @ARGV as argument
+ for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); }
+ if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; }
+ else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; }
+}
+
+# unified interface
+my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5));
+# local variables
+my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7));
+
+sub alg_cbc_encrypt_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_cbc_encrypt
+.align 32
+${alg}${bits}_t4_cbc_encrypt:
+ save %sp, -$::frame, %sp
+ cmp $len, 0
+ be,pn $::size_t_cc, .L${bits}_cbc_enc_abort
+ srln $len, 0, $len ! needed on v8+, "nop" on v9
+ sub $inp, $out, $blk_init ! $inp!=$out
+___
+$::code.=<<___ if (!$::evp);
+ andcc $ivec, 7, $ivoff
+ alignaddr $ivec, %g0, $ivec
+
+ ldd [$ivec + 0], %f0 ! load ivec
+ bz,pt %icc, 1f
+ ldd [$ivec + 8], %f2
+ ldd [$ivec + 16], %f4
+ faligndata %f0, %f2, %f0
+ faligndata %f2, %f4, %f2
+1:
+___
+$::code.=<<___ if ($::evp);
+ ld [$ivec + 0], %f0
+ ld [$ivec + 4], %f1
+ ld [$ivec + 8], %f2
+ ld [$ivec + 12], %f3
+___
+$::code.=<<___;
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ call _${alg}${bits}_load_enckey
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 64, $iright
+ mov 0xff, $omask
+ sub $iright, $ileft, $iright
+ and $out, 7, $ooff
+ cmp $len, 127
+ movrnz $ooff, 0, $blk_init ! if ( $out&7 ||
+ movleu $::size_t_cc, 0, $blk_init ! $len<128 ||
+ brnz,pn $blk_init, .L${bits}cbc_enc_blk ! $inp==$out)
+ srl $omask, $ooff, $omask
+
+ alignaddrl $out, %g0, $out
+ srlx $len, 4, $len
+ prefetch [$out], 22
+
+.L${bits}_cbc_enc_loop:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 4f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+4:
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ movxtod %o0, %f12
+ movxtod %o1, %f14
+
+ fxor %f12, %f0, %f0 ! ^= ivec
+ fxor %f14, %f2, %f2
+ prefetch [$out + 63], 22
+ prefetch [$inp + 16+63], 20
+ call _${alg}${bits}_encrypt_1x
+ add $inp, 16, $inp
+
+ brnz,pn $ooff, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ brnz,pt $len, .L${bits}_cbc_enc_loop
+ add $out, 16, $out
+___
+$::code.=<<___ if ($::evp);
+ st %f0, [$ivec + 0]
+ st %f1, [$ivec + 4]
+ st %f2, [$ivec + 8]
+ st %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, 3f
+ nop
+
+ std %f0, [$ivec + 0] ! write out ivec
+ std %f2, [$ivec + 8]
+___
+$::code.=<<___;
+.L${bits}_cbc_enc_abort:
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f4 ! handle unaligned output
+ faligndata %f0, %f2, %f6
+ faligndata %f2, %f2, %f8
+
+ stda %f4, [$out + $omask]0xc0 ! partial store
+ std %f6, [$out + 8]
+ add $out, 16, $out
+ orn %g0, $omask, $omask
+ stda %f8, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_cbc_enc_loop+4
+ orn %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+ st %f0, [$ivec + 0]
+ st %f1, [$ivec + 4]
+ st %f2, [$ivec + 8]
+ st %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, 3f
+ nop
+
+ std %f0, [$ivec + 0] ! write out ivec
+ std %f2, [$ivec + 8]
+ ret
+ restore
+
+.align 16
+3: alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec
+ mov 0xff, $omask
+ srl $omask, $ivoff, $omask
+ faligndata %f0, %f0, %f4
+ faligndata %f0, %f2, %f6
+ faligndata %f2, %f2, %f8
+ stda %f4, [$ivec + $omask]0xc0
+ std %f6, [$ivec + 8]
+ add $ivec, 16, $ivec
+ orn %g0, $omask, $omask
+ stda %f8, [$ivec + $omask]0xc0
+___
+$::code.=<<___;
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}cbc_enc_blk:
+ add $out, $len, $blk_init
+ and $blk_init, 63, $blk_init ! tail
+ sub $len, $blk_init, $len
+ add $blk_init, 15, $blk_init ! round up to 16n
+ srlx $len, 4, $len
+ srl $blk_init, 4, $blk_init
+
+.L${bits}_cbc_enc_blk_loop:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 5f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+5:
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ movxtod %o0, %f12
+ movxtod %o1, %f14
+
+ fxor %f12, %f0, %f0 ! ^= ivec
+ fxor %f14, %f2, %f2
+ prefetch [$inp + 16+63], 20
+ call _${alg}${bits}_encrypt_1x
+ add $inp, 16, $inp
+ sub $len, 1, $len
+
+ stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ brnz,pt $len, .L${bits}_cbc_enc_blk_loop
+ add $out, 8, $out
+
+ membar #StoreLoad|#StoreStore
+ brnz,pt $blk_init, .L${bits}_cbc_enc_loop
+ mov $blk_init, $len
+___
+$::code.=<<___ if ($::evp);
+ st %f0, [$ivec + 0]
+ st %f1, [$ivec + 4]
+ st %f2, [$ivec + 8]
+ st %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, 3b
+ nop
+
+ std %f0, [$ivec + 0] ! write out ivec
+ std %f2, [$ivec + 8]
+___
+$::code.=<<___;
+ ret
+ restore
+.type ${alg}${bits}_t4_cbc_encrypt,#function
+.size ${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt
+___
+}
+
+sub alg_cbc_decrypt_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_cbc_decrypt
+.align 32
+${alg}${bits}_t4_cbc_decrypt:
+ save %sp, -$::frame, %sp
+ cmp $len, 0
+ be,pn $::size_t_cc, .L${bits}_cbc_dec_abort
+ srln $len, 0, $len ! needed on v8+, "nop" on v9
+ sub $inp, $out, $blk_init ! $inp!=$out
+___
+$::code.=<<___ if (!$::evp);
+ andcc $ivec, 7, $ivoff
+ alignaddr $ivec, %g0, $ivec
+
+ ldd [$ivec + 0], %f12 ! load ivec
+ bz,pt %icc, 1f
+ ldd [$ivec + 8], %f14
+ ldd [$ivec + 16], %f0
+ faligndata %f12, %f14, %f12
+ faligndata %f14, %f0, %f14
+1:
+___
+$::code.=<<___ if ($::evp);
+ ld [$ivec + 0], %f12 ! load ivec
+ ld [$ivec + 4], %f13
+ ld [$ivec + 8], %f14
+ ld [$ivec + 12], %f15
+___
+$::code.=<<___;
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ call _${alg}${bits}_load_deckey
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 64, $iright
+ mov 0xff, $omask
+ sub $iright, $ileft, $iright
+ and $out, 7, $ooff
+ cmp $len, 255
+ movrnz $ooff, 0, $blk_init ! if ( $out&7 ||
+ movleu $::size_t_cc, 0, $blk_init ! $len<256 ||
+ brnz,pn $blk_init, .L${bits}cbc_dec_blk ! $inp==$out)
+ srl $omask, $ooff, $omask
+
+ andcc $len, 16, %g0 ! is number of blocks even?
+ srlx $len, 4, $len
+ alignaddrl $out, %g0, $out
+ bz %icc, .L${bits}_cbc_dec_loop2x
+ prefetch [$out], 22
+.L${bits}_cbc_dec_loop:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 4f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+4:
+ xor %g4, %o0, %o2 ! ^= rk[0]
+ xor %g5, %o1, %o3
+ movxtod %o2, %f0
+ movxtod %o3, %f2
+
+ prefetch [$out + 63], 22
+ prefetch [$inp + 16+63], 20
+ call _${alg}${bits}_decrypt_1x
+ add $inp, 16, $inp
+
+ fxor %f12, %f0, %f0 ! ^= ivec
+ fxor %f14, %f2, %f2
+ movxtod %o0, %f12
+ movxtod %o1, %f14
+
+ brnz,pn $ooff, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ brnz,pt $len, .L${bits}_cbc_dec_loop2x
+ add $out, 16, $out
+___
+$::code.=<<___ if ($::evp);
+ st %f12, [$ivec + 0]
+ st %f13, [$ivec + 4]
+ st %f14, [$ivec + 8]
+ st %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+ nop
+
+ std %f12, [$ivec + 0] ! write out ivec
+ std %f14, [$ivec + 8]
+___
+$::code.=<<___;
+.L${bits}_cbc_dec_abort:
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f4 ! handle unaligned output
+ faligndata %f0, %f2, %f6
+ faligndata %f2, %f2, %f8
+
+ stda %f4, [$out + $omask]0xc0 ! partial store
+ std %f6, [$out + 8]
+ add $out, 16, $out
+ orn %g0, $omask, $omask
+ stda %f8, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_cbc_dec_loop2x+4
+ orn %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+ st %f12, [$ivec + 0]
+ st %f13, [$ivec + 4]
+ st %f14, [$ivec + 8]
+ st %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+ nop
+
+ std %f12, [$ivec + 0] ! write out ivec
+ std %f14, [$ivec + 8]
+___
+$::code.=<<___;
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_cbc_dec_loop2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 4f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+4:
+ xor %g4, %o0, %o4 ! ^= rk[0]
+ xor %g5, %o1, %o5
+ movxtod %o4, %f0
+ movxtod %o5, %f2
+ xor %g4, %o2, %o4
+ xor %g5, %o3, %o5
+ movxtod %o4, %f4
+ movxtod %o5, %f6
+
+ prefetch [$out + 63], 22
+ prefetch [$inp + 32+63], 20
+ call _${alg}${bits}_decrypt_2x
+ add $inp, 32, $inp
+
+ movxtod %o0, %f8
+ movxtod %o1, %f10
+ fxor %f12, %f0, %f0 ! ^= ivec
+ fxor %f14, %f2, %f2
+ movxtod %o2, %f12
+ movxtod %o3, %f14
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ brnz,pn $ooff, 2f
+ sub $len, 2, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ std %f4, [$out + 16]
+ std %f6, [$out + 24]
+ brnz,pt $len, .L${bits}_cbc_dec_loop2x
+ add $out, 32, $out
+___
+$::code.=<<___ if ($::evp);
+ st %f12, [$ivec + 0]
+ st %f13, [$ivec + 4]
+ st %f14, [$ivec + 8]
+ st %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+ nop
+
+ std %f12, [$ivec + 0] ! write out ivec
+ std %f14, [$ivec + 8]
+___
+$::code.=<<___;
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f8 ! handle unaligned output
+ faligndata %f0, %f2, %f0
+ faligndata %f2, %f4, %f2
+ faligndata %f4, %f6, %f4
+ faligndata %f6, %f6, %f6
+ stda %f8, [$out + $omask]0xc0 ! partial store
+ std %f0, [$out + 8]
+ std %f2, [$out + 16]
+ std %f4, [$out + 24]
+ add $out, 32, $out
+ orn %g0, $omask, $omask
+ stda %f6, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_cbc_dec_loop2x+4
+ orn %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+ st %f12, [$ivec + 0]
+ st %f13, [$ivec + 4]
+ st %f14, [$ivec + 8]
+ st %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+ nop
+
+ std %f12, [$ivec + 0] ! write out ivec
+ std %f14, [$ivec + 8]
+ ret
+ restore
+
+.align 16
+.L${bits}_cbc_dec_unaligned_ivec:
+ alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec
+ mov 0xff, $omask
+ srl $omask, $ivoff, $omask
+ faligndata %f12, %f12, %f0
+ faligndata %f12, %f14, %f2
+ faligndata %f14, %f14, %f4
+ stda %f0, [$ivec + $omask]0xc0
+ std %f2, [$ivec + 8]
+ add $ivec, 16, $ivec
+ orn %g0, $omask, $omask
+ stda %f4, [$ivec + $omask]0xc0
+___
+$::code.=<<___;
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}cbc_dec_blk:
+ add $out, $len, $blk_init
+ and $blk_init, 63, $blk_init ! tail
+ sub $len, $blk_init, $len
+ add $blk_init, 15, $blk_init ! round up to 16n
+ srlx $len, 4, $len
+ srl $blk_init, 4, $blk_init
+ sub $len, 1, $len
+ add $blk_init, 1, $blk_init
+
+.L${bits}_cbc_dec_blk_loop2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 5f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+5:
+ xor %g4, %o0, %o4 ! ^= rk[0]
+ xor %g5, %o1, %o5
+ movxtod %o4, %f0
+ movxtod %o5, %f2
+ xor %g4, %o2, %o4
+ xor %g5, %o3, %o5
+ movxtod %o4, %f4
+ movxtod %o5, %f6
+
+ prefetch [$inp + 32+63], 20
+ call _${alg}${bits}_decrypt_2x
+ add $inp, 32, $inp
+ subcc $len, 2, $len
+
+ movxtod %o0, %f8
+ movxtod %o1, %f10
+ fxor %f12, %f0, %f0 ! ^= ivec
+ fxor %f14, %f2, %f2
+ movxtod %o2, %f12
+ movxtod %o3, %f14
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ bgu,pt $::size_t_cc, .L${bits}_cbc_dec_blk_loop2x
+ add $out, 8, $out
+
+ add $blk_init, $len, $len
+ andcc $len, 1, %g0 ! is number of blocks even?
+ membar #StoreLoad|#StoreStore
+ bnz,pt %icc, .L${bits}_cbc_dec_loop
+ srl $len, 0, $len
+ brnz,pn $len, .L${bits}_cbc_dec_loop2x
+ nop
+___
+$::code.=<<___ if ($::evp);
+ st %f12, [$ivec + 0] ! write out ivec
+ st %f13, [$ivec + 4]
+ st %f14, [$ivec + 8]
+ st %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+ brnz,pn $ivoff, 3b
+ nop
+
+ std %f12, [$ivec + 0] ! write out ivec
+ std %f14, [$ivec + 8]
+___
+$::code.=<<___;
+ ret
+ restore
+.type ${alg}${bits}_t4_cbc_decrypt,#function
+.size ${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt
+___
+}
+
+sub alg_ctr32_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_ctr32_encrypt
+.align 32
+${alg}${bits}_t4_ctr32_encrypt:
+ save %sp, -$::frame, %sp
+ srln $len, 0, $len ! needed on v8+, "nop" on v9
+
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ call _${alg}${bits}_load_enckey
+ sllx $len, 4, $len
+
+ ld [$ivec + 0], %l4 ! counter
+ ld [$ivec + 4], %l5
+ ld [$ivec + 8], %l6
+ ld [$ivec + 12], %l7
+
+ sllx %l4, 32, %o5
+ or %l5, %o5, %o5
+ sllx %l6, 32, %g1
+ xor %o5, %g4, %g4 ! ^= rk[0]
+ xor %g1, %g5, %g5
+ movxtod %g4, %f14 ! most significant 64 bits
+
+ sub $inp, $out, $blk_init ! $inp!=$out
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 64, $iright
+ mov 0xff, $omask
+ sub $iright, $ileft, $iright
+ and $out, 7, $ooff
+ cmp $len, 255
+ movrnz $ooff, 0, $blk_init ! if ( $out&7 ||
+ movleu $::size_t_cc, 0, $blk_init ! $len<256 ||
+ brnz,pn $blk_init, .L${bits}_ctr32_blk ! $inp==$out)
+ srl $omask, $ooff, $omask
+
+ andcc $len, 16, %g0 ! is number of blocks even?
+ alignaddrl $out, %g0, $out
+ bz %icc, .L${bits}_ctr32_loop2x
+ srlx $len, 4, $len
+.L${bits}_ctr32_loop:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 4f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+4:
+ xor %g5, %l7, %g1 ! ^= rk[0]
+ add %l7, 1, %l7
+ movxtod %g1, %f2
+ srl %l7, 0, %l7 ! clruw
+ prefetch [$out + 63], 22
+ prefetch [$inp + 16+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+ aes_eround01 %f16, %f14, %f2, %f4
+ aes_eround23 %f18, %f14, %f2, %f2
+___
+$::code.=<<___ if ($alg eq "cmll");
+ camellia_f %f16, %f2, %f14, %f2
+ camellia_f %f18, %f14, %f2, %f0
+___
+$::code.=<<___;
+ call _${alg}${bits}_encrypt_1x+8
+ add $inp, 16, $inp
+
+ movxtod %o0, %f10
+ movxtod %o1, %f12
+ fxor %f10, %f0, %f0 ! ^= inp
+ fxor %f12, %f2, %f2
+
+ brnz,pn $ooff, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ brnz,pt $len, .L${bits}_ctr32_loop2x
+ add $out, 16, $out
+
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f4 ! handle unaligned output
+ faligndata %f0, %f2, %f6
+ faligndata %f2, %f2, %f8
+ stda %f4, [$out + $omask]0xc0 ! partial store
+ std %f6, [$out + 8]
+ add $out, 16, $out
+ orn %g0, $omask, $omask
+ stda %f8, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_ctr32_loop2x+4
+ orn %g0, $omask, $omask
+
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_ctr32_loop2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 4f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+4:
+ xor %g5, %l7, %g1 ! ^= rk[0]
+ add %l7, 1, %l7
+ movxtod %g1, %f2
+ srl %l7, 0, %l7 ! clruw
+ xor %g5, %l7, %g1
+ add %l7, 1, %l7
+ movxtod %g1, %f6
+ srl %l7, 0, %l7 ! clruw
+ prefetch [$out + 63], 22
+ prefetch [$inp + 32+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+ aes_eround01 %f16, %f14, %f2, %f8
+ aes_eround23 %f18, %f14, %f2, %f2
+ aes_eround01 %f16, %f14, %f6, %f10
+ aes_eround23 %f18, %f14, %f6, %f6
+___
+$::code.=<<___ if ($alg eq "cmll");
+ camellia_f %f16, %f2, %f14, %f2
+ camellia_f %f16, %f6, %f14, %f6
+ camellia_f %f18, %f14, %f2, %f0
+ camellia_f %f18, %f14, %f6, %f4
+___
+$::code.=<<___;
+ call _${alg}${bits}_encrypt_2x+16
+ add $inp, 32, $inp
+
+ movxtod %o0, %f8
+ movxtod %o1, %f10
+ movxtod %o2, %f12
+ fxor %f8, %f0, %f0 ! ^= inp
+ movxtod %o3, %f8
+ fxor %f10, %f2, %f2
+ fxor %f12, %f4, %f4
+ fxor %f8, %f6, %f6
+
+ brnz,pn $ooff, 2f
+ sub $len, 2, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ std %f4, [$out + 16]
+ std %f6, [$out + 24]
+ brnz,pt $len, .L${bits}_ctr32_loop2x
+ add $out, 32, $out
+
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f8 ! handle unaligned output
+ faligndata %f0, %f2, %f0
+ faligndata %f2, %f4, %f2
+ faligndata %f4, %f6, %f4
+ faligndata %f6, %f6, %f6
+
+ stda %f8, [$out + $omask]0xc0 ! partial store
+ std %f0, [$out + 8]
+ std %f2, [$out + 16]
+ std %f4, [$out + 24]
+ add $out, 32, $out
+ orn %g0, $omask, $omask
+ stda %f6, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_ctr32_loop2x+4
+ orn %g0, $omask, $omask
+
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_ctr32_blk:
+ add $out, $len, $blk_init
+ and $blk_init, 63, $blk_init ! tail
+ sub $len, $blk_init, $len
+ add $blk_init, 15, $blk_init ! round up to 16n
+ srlx $len, 4, $len
+ srl $blk_init, 4, $blk_init
+ sub $len, 1, $len
+ add $blk_init, 1, $blk_init
+
+.L${bits}_ctr32_blk_loop2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 5f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+5:
+ xor %g5, %l7, %g1 ! ^= rk[0]
+ add %l7, 1, %l7
+ movxtod %g1, %f2
+ srl %l7, 0, %l7 ! clruw
+ xor %g5, %l7, %g1
+ add %l7, 1, %l7
+ movxtod %g1, %f6
+ srl %l7, 0, %l7 ! clruw
+ prefetch [$inp + 32+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+ aes_eround01 %f16, %f14, %f2, %f8
+ aes_eround23 %f18, %f14, %f2, %f2
+ aes_eround01 %f16, %f14, %f6, %f10
+ aes_eround23 %f18, %f14, %f6, %f6
+___
+$::code.=<<___ if ($alg eq "cmll");
+ camellia_f %f16, %f2, %f14, %f2
+ camellia_f %f16, %f6, %f14, %f6
+ camellia_f %f18, %f14, %f2, %f0
+ camellia_f %f18, %f14, %f6, %f4
+___
+$::code.=<<___;
+ call _${alg}${bits}_encrypt_2x+16
+ add $inp, 32, $inp
+ subcc $len, 2, $len
+
+ movxtod %o0, %f8
+ movxtod %o1, %f10
+ movxtod %o2, %f12
+ fxor %f8, %f0, %f0 ! ^= inp
+ movxtod %o3, %f8
+ fxor %f10, %f2, %f2
+ fxor %f12, %f4, %f4
+ fxor %f8, %f6, %f6
+
+ stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ bgu,pt $::size_t_cc, .L${bits}_ctr32_blk_loop2x
+ add $out, 8, $out
+
+ add $blk_init, $len, $len
+ andcc $len, 1, %g0 ! is number of blocks even?
+ membar #StoreLoad|#StoreStore
+ bnz,pt %icc, .L${bits}_ctr32_loop
+ srl $len, 0, $len
+ brnz,pn $len, .L${bits}_ctr32_loop2x
+ nop
+
+ ret
+ restore
+.type ${alg}${bits}_t4_ctr32_encrypt,#function
+.size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt
+___
+}
+
+sub alg_xts_implement {
+my ($alg,$bits,$dir) = @_;
+my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5));
+my $rem=$ivec;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_xts_${dir}crypt
+.align 32
+${alg}${bits}_t4_xts_${dir}crypt:
+ save %sp, -$::frame-16, %sp
+ srln $len, 0, $len ! needed on v8+, "nop" on v9
+
+ mov $ivec, %o0
+ add %fp, $::bias-16, %o1
+ call ${alg}_t4_encrypt
+ mov $key2, %o2
+
+ add %fp, $::bias-16, %l7
+ ldxa [%l7]0x88, %g2
+ add %fp, $::bias-8, %l7
+ ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
+
+ sethi %hi(0x76543210), %l7
+ or %l7, %lo(0x76543210), %l7
+ bmask %l7, %g0, %g0 ! byte swap mask
+
+ prefetch [$inp], 20
+ prefetch [$inp + 63], 20
+ call _${alg}${bits}_load_${dir}ckey
+ and $len, 15, $rem
+ and $len, -16, $len
+___
+$code.=<<___ if ($dir eq "de");
+ mov 0, %l7
+ movrnz $rem, 16, %l7
+ sub $len, %l7, $len
+___
+$code.=<<___;
+
+ sub $inp, $out, $blk_init ! $inp!=$out
+ and $inp, 7, $ileft
+ andn $inp, 7, $inp
+ sll $ileft, 3, $ileft
+ mov 64, $iright
+ mov 0xff, $omask
+ sub $iright, $ileft, $iright
+ and $out, 7, $ooff
+ cmp $len, 255
+ movrnz $ooff, 0, $blk_init ! if ( $out&7 ||
+ movleu $::size_t_cc, 0, $blk_init ! $len<256 ||
+ brnz,pn $blk_init, .L${bits}_xts_${dir}blk ! $inp==$out)
+ srl $omask, $ooff, $omask
+
+ andcc $len, 16, %g0 ! is number of blocks even?
+___
+$code.=<<___ if ($dir eq "de");
+ brz,pn $len, .L${bits}_xts_${dir}steal
+___
+$code.=<<___;
+ alignaddrl $out, %g0, $out
+ bz %icc, .L${bits}_xts_${dir}loop2x
+ srlx $len, 4, $len
+.L${bits}_xts_${dir}loop:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 4f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+4:
+ movxtod %g2, %f12
+ movxtod %g3, %f14
+ bshuffle %f12, %f12, %f12
+ bshuffle %f14, %f14, %f14
+
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ movxtod %o0, %f0
+ movxtod %o1, %f2
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+
+ prefetch [$out + 63], 22
+ prefetch [$inp + 16+63], 20
+ call _${alg}${bits}_${dir}crypt_1x
+ add $inp, 16, $inp
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %g2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %g3
+ xor %l7, %g2, %g2
+
+ brnz,pn $ooff, 2f
+ sub $len, 1, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ brnz,pt $len, .L${bits}_xts_${dir}loop2x
+ add $out, 16, $out
+
+ brnz,pn $rem, .L${bits}_xts_${dir}steal
+ nop
+
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f4 ! handle unaligned output
+ faligndata %f0, %f2, %f6
+ faligndata %f2, %f2, %f8
+ stda %f4, [$out + $omask]0xc0 ! partial store
+ std %f6, [$out + 8]
+ add $out, 16, $out
+ orn %g0, $omask, $omask
+ stda %f8, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_xts_${dir}loop2x+4
+ orn %g0, $omask, $omask
+
+ brnz,pn $rem, .L${bits}_xts_${dir}steal
+ nop
+
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_xts_${dir}loop2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 4f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+4:
+ movxtod %g2, %f12
+ movxtod %g3, %f14
+ bshuffle %f12, %f12, %f12
+ bshuffle %f14, %f14, %f14
+
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %g2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %g3
+ xor %l7, %g2, %g2
+
+ movxtod %g2, %f8
+ movxtod %g3, %f10
+ bshuffle %f8, %f8, %f8
+ bshuffle %f10, %f10, %f10
+
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ xor %g4, %o2, %o2 ! ^= rk[0]
+ xor %g5, %o3, %o3
+ movxtod %o0, %f0
+ movxtod %o1, %f2
+ movxtod %o2, %f4
+ movxtod %o3, %f6
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+ fxor %f8, %f4, %f4 ! ^= tweak[0]
+ fxor %f10, %f6, %f6
+
+ prefetch [$out + 63], 22
+ prefetch [$inp + 32+63], 20
+ call _${alg}${bits}_${dir}crypt_2x
+ add $inp, 32, $inp
+
+ movxtod %g2, %f8
+ movxtod %g3, %f10
+
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %g2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %g3
+ xor %l7, %g2, %g2
+
+ bshuffle %f8, %f8, %f8
+ bshuffle %f10, %f10, %f10
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ brnz,pn $ooff, 2f
+ sub $len, 2, $len
+
+ std %f0, [$out + 0]
+ std %f2, [$out + 8]
+ std %f4, [$out + 16]
+ std %f6, [$out + 24]
+ brnz,pt $len, .L${bits}_xts_${dir}loop2x
+ add $out, 32, $out
+
+ fsrc2 %f4, %f0
+ fsrc2 %f6, %f2
+ brnz,pn $rem, .L${bits}_xts_${dir}steal
+ nop
+
+ ret
+ restore
+
+.align 16
+2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
+ ! and ~3x deterioration
+ ! in inp==out case
+ faligndata %f0, %f0, %f8 ! handle unaligned output
+ faligndata %f0, %f2, %f10
+ faligndata %f2, %f4, %f12
+ faligndata %f4, %f6, %f14
+ faligndata %f6, %f6, %f0
+
+ stda %f8, [$out + $omask]0xc0 ! partial store
+ std %f10, [$out + 8]
+ std %f12, [$out + 16]
+ std %f14, [$out + 24]
+ add $out, 32, $out
+ orn %g0, $omask, $omask
+ stda %f0, [$out + $omask]0xc0 ! partial store
+
+ brnz,pt $len, .L${bits}_xts_${dir}loop2x+4
+ orn %g0, $omask, $omask
+
+ fsrc2 %f4, %f0
+ fsrc2 %f6, %f2
+ brnz,pn $rem, .L${bits}_xts_${dir}steal
+ nop
+
+ ret
+ restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_xts_${dir}blk:
+ add $out, $len, $blk_init
+ and $blk_init, 63, $blk_init ! tail
+ sub $len, $blk_init, $len
+ add $blk_init, 15, $blk_init ! round up to 16n
+ srlx $len, 4, $len
+ srl $blk_init, 4, $blk_init
+ sub $len, 1, $len
+ add $blk_init, 1, $blk_init
+
+.L${bits}_xts_${dir}blk2x:
+ ldx [$inp + 0], %o0
+ ldx [$inp + 8], %o1
+ ldx [$inp + 16], %o2
+ brz,pt $ileft, 5f
+ ldx [$inp + 24], %o3
+
+ ldx [$inp + 32], %o4
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ or %g1, %o0, %o0
+ sllx %o1, $ileft, %o1
+ srlx %o2, $iright, %g1
+ or %g1, %o1, %o1
+ sllx %o2, $ileft, %o2
+ srlx %o3, $iright, %g1
+ or %g1, %o2, %o2
+ sllx %o3, $ileft, %o3
+ srlx %o4, $iright, %o4
+ or %o4, %o3, %o3
+5:
+ movxtod %g2, %f12
+ movxtod %g3, %f14
+ bshuffle %f12, %f12, %f12
+ bshuffle %f14, %f14, %f14
+
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %g2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %g3
+ xor %l7, %g2, %g2
+
+ movxtod %g2, %f8
+ movxtod %g3, %f10
+ bshuffle %f8, %f8, %f8
+ bshuffle %f10, %f10, %f10
+
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ xor %g4, %o2, %o2 ! ^= rk[0]
+ xor %g5, %o3, %o3
+ movxtod %o0, %f0
+ movxtod %o1, %f2
+ movxtod %o2, %f4
+ movxtod %o3, %f6
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+ fxor %f8, %f4, %f4 ! ^= tweak[0]
+ fxor %f10, %f6, %f6
+
+ prefetch [$inp + 32+63], 20
+ call _${alg}${bits}_${dir}crypt_2x
+ add $inp, 32, $inp
+
+ movxtod %g2, %f8
+ movxtod %g3, %f10
+
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %g2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %g3
+ xor %l7, %g2, %g2
+
+ bshuffle %f8, %f8, %f8
+ bshuffle %f10, %f10, %f10
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ subcc $len, 2, $len
+ stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ add $out, 8, $out
+ stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
+ bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x
+ add $out, 8, $out
+
+ add $blk_init, $len, $len
+ andcc $len, 1, %g0 ! is number of blocks even?
+ membar #StoreLoad|#StoreStore
+ bnz,pt %icc, .L${bits}_xts_${dir}loop
+ srl $len, 0, $len
+ brnz,pn $len, .L${bits}_xts_${dir}loop2x
+ nop
+
+ fsrc2 %f4, %f0
+ fsrc2 %f6, %f2
+ brnz,pn $rem, .L${bits}_xts_${dir}steal
+ nop
+
+ ret
+ restore
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+___
+$code.=<<___ if ($dir eq "en");
+.align 32
+.L${bits}_xts_${dir}steal:
+ std %f0, [%fp + $::bias-16] ! copy of output
+ std %f2, [%fp + $::bias-8]
+
+ srl $ileft, 3, $ileft
+ add %fp, $::bias-16, %l7
+ add $inp, $ileft, $inp ! original $inp+$len&-15
+ add $out, $ooff, $out ! original $out+$len&-15
+ mov 0, $ileft
+ nop ! align
+
+.L${bits}_xts_${dir}stealing:
+ ldub [$inp + $ileft], %o0
+ ldub [%l7 + $ileft], %o1
+ dec $rem
+ stb %o0, [%l7 + $ileft]
+ stb %o1, [$out + $ileft]
+ brnz $rem, .L${bits}_xts_${dir}stealing
+ inc $ileft
+
+ mov %l7, $inp
+ sub $out, 16, $out
+ mov 0, $ileft
+ sub $out, $ooff, $out
+ ba .L${bits}_xts_${dir}loop ! one more time
+ mov 1, $len ! $rem is 0
+___
+$code.=<<___ if ($dir eq "de");
+.align 32
+.L${bits}_xts_${dir}steal:
+ ldx [$inp + 0], %o0
+ brz,pt $ileft, 8f
+ ldx [$inp + 8], %o1
+
+ ldx [$inp + 16], %o2
+ sllx %o0, $ileft, %o0
+ srlx %o1, $iright, %g1
+ sllx %o1, $ileft, %o1
+ or %g1, %o0, %o0
+ srlx %o2, $iright, %o2
+ or %o2, %o1, %o1
+8:
+ srax %g3, 63, %l7 ! next tweak value
+ addcc %g2, %g2, %o2
+ and %l7, 0x87, %l7
+ addxc %g3, %g3, %o3
+ xor %l7, %o2, %o2
+
+ movxtod %o2, %f12
+ movxtod %o3, %f14
+ bshuffle %f12, %f12, %f12
+ bshuffle %f14, %f14, %f14
+
+ xor %g4, %o0, %o0 ! ^= rk[0]
+ xor %g5, %o1, %o1
+ movxtod %o0, %f0
+ movxtod %o1, %f2
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+
+ call _${alg}${bits}_${dir}crypt_1x
+ add $inp, 16, $inp
+
+ fxor %f12, %f0, %f0 ! ^= tweak[0]
+ fxor %f14, %f2, %f2
+
+ std %f0, [%fp + $::bias-16]
+ std %f2, [%fp + $::bias-8]
+
+ srl $ileft, 3, $ileft
+ add %fp, $::bias-16, %l7
+ add $inp, $ileft, $inp ! original $inp+$len&-15
+ add $out, $ooff, $out ! original $out+$len&-15
+ mov 0, $ileft
+ add $out, 16, $out
+ nop ! align
+
+.L${bits}_xts_${dir}stealing:
+ ldub [$inp + $ileft], %o0
+ ldub [%l7 + $ileft], %o1
+ dec $rem
+ stb %o0, [%l7 + $ileft]
+ stb %o1, [$out + $ileft]
+ brnz $rem, .L${bits}_xts_${dir}stealing
+ inc $ileft
+
+ mov %l7, $inp
+ sub $out, 16, $out
+ mov 0, $ileft
+ sub $out, $ooff, $out
+ ba .L${bits}_xts_${dir}loop ! one more time
+ mov 1, $len ! $rem is 0
+___
+$code.=<<___;
+ ret
+ restore
+.type ${alg}${bits}_t4_xts_${dir}crypt,#function
+.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt
+___
+}
+
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %visopf = ( "faligndata" => 0x048,
+ "bshuffle" => 0x04c,
+ "fnot2" => 0x066,
+ "fxor" => 0x06c,
+ "fsrc2" => 0x078 );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+ if ($opf=$visopf{$mnemonic}) {
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub unvis3 {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my ($ref,$opf);
+my %visopf = ( "addxc" => 0x011,
+ "addxccc" => 0x013,
+ "umulxhi" => 0x016,
+ "alignaddr" => 0x018,
+ "bmask" => 0x019,
+ "alignaddrl" => 0x01a );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+ if ($opf=$visopf{$mnemonic}) {
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%([goli])([0-9])/);
+ $_=$bias{$1}+$2;
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub unaes_round { # 4-argument instructions
+my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_;
+my ($ref,$opf);
+my %aesopf = ( "aes_eround01" => 0,
+ "aes_eround23" => 1,
+ "aes_dround01" => 2,
+ "aes_dround23" => 3,
+ "aes_eround01_l"=> 4,
+ "aes_eround23_l"=> 5,
+ "aes_dround01_l"=> 6,
+ "aes_dround23_l"=> 7,
+ "aes_kexpand1" => 8 );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd";
+
+ if (defined($opf=$aesopf{$mnemonic})) {
+ $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3;
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub unaes_kexpand { # 3-argument instructions
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %aesopf = ( "aes_kexpand0" => 0x130,
+ "aes_kexpand2" => 0x131 );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+ if (defined($opf=$aesopf{$mnemonic})) {
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub uncamellia_f { # 4-argument instructions
+my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_;
+my ($ref,$opf);
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd";
+
+ if (1) {
+ $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3;
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub uncamellia3 { # 3-argument instructions
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %cmllopf = ( "camellia_fl" => 0x13c,
+ "camellia_fli" => 0x13d );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+ if (defined($opf=$cmllopf{$mnemonic})) {
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub unmovxtox { # 2-argument instructions
+my ($mnemonic,$rs,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 );
+my ($ref,$opf);
+my %movxopf = ( "movdtox" => 0x110,
+ "movstouw" => 0x111,
+ "movstosw" => 0x113,
+ "movxtod" => 0x118,
+ "movwtos" => 0x119 );
+
+ $ref = "$mnemonic\t$rs,$rd";
+
+ if (defined($opf=$movxopf{$mnemonic})) {
+ foreach ($rs,$rd) {
+ return $ref if (!/%([fgoli])([0-9]{1,2})/);
+ $_=$bias{$1}+$2;
+ if ($2>=32) {
+ return $ref if ($2&1);
+ # re-encode for upper double register addressing
+ $_=($2|$2>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|$rd<<25|0x36<<19|$opf<<5|$rs,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+
+sub undes {
+my ($mnemonic)=shift;
+my @args=@_;
+my ($ref,$opf);
+my %desopf = ( "des_round" => 0b1001,
+ "des_ip" => 0b100110100,
+ "des_iip" => 0b100110101,
+ "des_kexpand" => 0b100110110 );
+
+ $ref = "$mnemonic\t".join(",",@_);
+
+ if (defined($opf=$desopf{$mnemonic})) { # 4-arg
+ if ($mnemonic eq "des_round") {
+ foreach (@args[0..3]) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25,
+ $ref;
+ } elsif ($mnemonic eq "des_kexpand") { # 3-arg
+ foreach (@args[0..2]) {
+ return $ref if (!/(%f)?([0-9]{1,2})/);
+ $_=$2;
+ if ($2>=32) {
+ return $ref if ($2&1);
+ # re-encode for upper double register addressing
+ $_=($2|$2>>5)&31;
+ }
+ }
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25,
+ $ref;
+ } else { # 2-arg
+ foreach (@args[0..1]) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($2&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+ return sprintf ".word\t0x%08x !%s",
+ 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25,
+ $ref;
+ }
+ } else {
+ return $ref;
+ }
+}
+
+sub emit_assembler {
+ foreach (split("\n",$::code)) {
+ s/\`([^\`]*)\`/eval $1/ge;
+
+ s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go;
+
+ s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
+ &unaes_round($1,$2,$3,$4,$5)
+ /geo or
+ s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+ &unaes_kexpand($1,$2,$3,$4)
+ /geo or
+ s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
+ &uncamellia_f($1,$2,$3,$4,$5)
+ /geo or
+ s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+ &uncamellia3($1,$2,$3,$4)
+ /geo or
+ s/\b(des_\w+)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+)(?:,\s*(%f[0-9]{1,2})(?:,\s*(%f[0-9]{1,2}))?)?/
+ &undes($1,$2,$3,$4,$5)
+ /geo or
+ s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/
+ &unmovxtox($1,$2,$3)
+ /geo or
+ s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/
+ &unmovxtox($1,$2,$3)
+ /geo or
+ s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+ &unvis($1,$2,$3,$4)
+ /geo or
+ s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+ &unvis3($1,$2,$3,$4)
+ /geo;
+
+ print $_,"\n";
+ }
+}
+
+1;
diff --git a/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl
new file mode 100755
index 0000000..6eaefcf
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl
@@ -0,0 +1,1186 @@
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# Ascetic x86_64 AT&T to MASM/NASM assembler translator by <appro>.
+#
+# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T
+# format is way easier to parse. Because it's simpler to "gear" from
+# Unix ABI to Windows one [see cross-reference "card" at the end of
+# file]. Because Linux targets were available first...
+#
+# In addition the script also "distills" code suitable for GNU
+# assembler, so that it can be compiled with more rigid assemblers,
+# such as Solaris /usr/ccs/bin/as.
+#
+# This translator is not designed to convert *arbitrary* assembler
+# code from AT&T format to MASM one. It's designed to convert just
+# enough to provide for dual-ABI OpenSSL modules development...
+# There *are* limitations and you might have to modify your assembler
+# code or this script to achieve the desired result...
+#
+# Currently recognized limitations:
+#
+# - can't use multiple ops per line;
+#
+# Dual-ABI styling rules.
+#
+# 1. Adhere to Unix register and stack layout [see cross-reference
+# ABI "card" at the end for explanation].
+# 2. Forget about "red zone," stick to more traditional blended
+# stack frame allocation. If volatile storage is actually required
+# that is. If not, just leave the stack as is.
+# 3. Functions tagged with ".type name,@function" get crafted with
+# unified Win64 prologue and epilogue automatically. If you want
+# to take care of ABI differences yourself, tag functions as
+# ".type name,@abi-omnipotent" instead.
+# 4. To optimize the Win64 prologue you can specify number of input
+# arguments as ".type name,@function,N." Keep in mind that if N is
+# larger than 6, then you *have to* write "abi-omnipotent" code,
+# because >6 cases can't be addressed with unified prologue.
+# 5. Name local labels as .L*, do *not* use dynamic labels such as 1:
+# (sorry about latter).
+# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is
+# required to identify the spots, where to inject Win64 epilogue!
+# But on the pros, it's then prefixed with rep automatically:-)
+# 7. Stick to explicit ip-relative addressing. If you have to use
+# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??.
+# Both are recognized and translated to proper Win64 addressing
+# modes. To support legacy code a synthetic directive, .picmeup,
+# is implemented. It puts address of the *next* instruction into
+# target register, e.g.:
+#
+# .picmeup %rax
+# lea .Label-.(%rax),%rax
+#
+# 8. In order to provide for structured exception handling unified
+# Win64 prologue copies %rsp value to %rax. For further details
+# see SEH paragraph at the end.
+# 9. .init segment is allowed to contain calls to functions only.
+# a. If function accepts more than 4 arguments *and* >4th argument
+# is declared as non 64-bit value, do clear its upper part.
+
+
+use strict;
+
+my $flavour = shift;
+my $output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+open STDOUT,">$output" || die "can't open $output: $!"
+ if (defined($output));
+
+my $gas=1; $gas=0 if ($output =~ /\.asm$/);
+my $elf=1; $elf=0 if (!$gas);
+my $win64=0;
+my $prefix="";
+my $decor=".L";
+
+my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005
+my $masm=0;
+my $PTR=" PTR";
+
+my $nasmref=2.03;
+my $nasm=0;
+
+if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1;
+ $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`;
+ $prefix =~ s|\R$||; # Better chomp
+ }
+elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; }
+elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; }
+elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; }
+elsif (!$gas)
+{ if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i)
+ { $nasm = $1 + $2*0.01; $PTR=""; }
+ elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/)
+ { $masm = $1 + $2*2**-16 + $4*2**-32; }
+ die "no assembler found on %PATH" if (!($nasm || $masm));
+ $win64=1;
+ $elf=0;
+ $decor="\$L\$";
+}
+
+my $current_segment;
+my $current_function;
+my %globals;
+
+{ package opcode; # pick up opcodes
+ sub re {
+ my ($class, $line) = @_;
+ my $self = {};
+ my $ret;
+
+ if ($$line =~ /^([a-z][a-z0-9]*)/i) {
+ bless $self,$class;
+ $self->{op} = $1;
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+
+ undef $self->{sz};
+ if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain...
+ $self->{op} = $1;
+ $self->{sz} = $2;
+ } elsif ($self->{op} =~ /call|jmp/) {
+ $self->{sz} = "";
+ } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn
+ $self->{sz} = "";
+ } elsif ($self->{op} =~ /^v/) { # VEX
+ $self->{sz} = "";
+ } elsif ($self->{op} =~ /mov[dq]/ && $$line =~ /%xmm/) {
+ $self->{sz} = "";
+ } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
+ $self->{op} = $1;
+ $self->{sz} = $2;
+ }
+ }
+ $ret;
+ }
+ sub size {
+ my ($self, $sz) = @_;
+ $self->{sz} = $sz if (defined($sz) && !defined($self->{sz}));
+ $self->{sz};
+ }
+ sub out {
+ my $self = shift;
+ if ($gas) {
+ if ($self->{op} eq "movz") { # movz is pain...
+ sprintf "%s%s%s",$self->{op},$self->{sz},shift;
+ } elsif ($self->{op} =~ /^set/) {
+ "$self->{op}";
+ } elsif ($self->{op} eq "ret") {
+ my $epilogue = "";
+ if ($win64 && $current_function->{abi} eq "svr4") {
+ $epilogue = "movq 8(%rsp),%rdi\n\t" .
+ "movq 16(%rsp),%rsi\n\t";
+ }
+ $epilogue . ".byte 0xf3,0xc3";
+ } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") {
+ ".p2align\t3\n\t.quad";
+ } else {
+ "$self->{op}$self->{sz}";
+ }
+ } else {
+ $self->{op} =~ s/^movz/movzx/;
+ if ($self->{op} eq "ret") {
+ $self->{op} = "";
+ if ($win64 && $current_function->{abi} eq "svr4") {
+ $self->{op} = "mov rdi,QWORD$PTR\[8+rsp\]\t;WIN64 epilogue\n\t".
+ "mov rsi,QWORD$PTR\[16+rsp\]\n\t";
+ }
+ $self->{op} .= "DB\t0F3h,0C3h\t\t;repret";
+ } elsif ($self->{op} =~ /^(pop|push)f/) {
+ $self->{op} .= $self->{sz};
+ } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") {
+ $self->{op} = "\tDQ";
+ }
+ $self->{op};
+ }
+ }
+ sub mnemonic {
+ my ($self, $op) = @_;
+ $self->{op}=$op if (defined($op));
+ $self->{op};
+ }
+}
+{ package const; # pick up constants, which start with $
+ sub re {
+ my ($class, $line) = @_;
+ my $self = {};
+ my $ret;
+
+ if ($$line =~ /^\$([^,]+)/) {
+ bless $self, $class;
+ $self->{value} = $1;
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+
+ $self->{value} =~ s/\b(0b[0-1]+)/oct($1)/eig;
+ if ($gas) {
+ # Solaris /usr/ccs/bin/as can't handle multiplications
+ # in $self->{value}
+ my $value = $self->{value};
+ no warnings; # oct might complain about overflow, ignore here...
+ $value =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
+ if ($value =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg) {
+ $self->{value} = $value;
+ }
+ sprintf "\$%s",$self->{value};
+ } else {
+ my $value = $self->{value};
+ $value =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm);
+ sprintf "%s",$value;
+ }
+ }
+}
+{ package ea; # pick up effective addresses: expr(%reg,%reg,scale)
+ sub re {
+ my ($class, $line, $opcode) = @_;
+ my $self = {};
+ my $ret;
+
+ # optional * ----vvv--- appears in indirect jmp/call
+ if ($$line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) {
+ bless $self, $class;
+ $self->{asterisk} = $1;
+ $self->{label} = $2;
+ ($self->{base},$self->{index},$self->{scale})=split(/,/,$3);
+ $self->{scale} = 1 if (!defined($self->{scale}));
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+
+ if ($win64 && $self->{label} =~ s/\@GOTPCREL//) {
+ die if ($opcode->mnemonic() ne "mov");
+ $opcode->mnemonic("lea");
+ }
+ $self->{base} =~ s/^%//;
+ $self->{index} =~ s/^%// if (defined($self->{index}));
+ $self->{opcode} = $opcode;
+ }
+ $ret;
+ }
+ sub size {}
+ sub out {
+ my ($self, $sz) = @_;
+
+ $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei;
+ $self->{label} =~ s/\.L/$decor/g;
+
+ # Silently convert all EAs to 64-bit. This is required for
+ # elder GNU assembler and results in more compact code,
+ # *but* most importantly AES module depends on this feature!
+ $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
+ $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
+
+ # Solaris /usr/ccs/bin/as can't handle multiplications
+ # in $self->{label}...
+ use integer;
+ $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
+ $self->{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg;
+
+ # Some assemblers insist on signed presentation of 32-bit
+ # offsets, but sign extension is a tricky business in perl...
+ if ((1<<31)<<1) {
+ $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg;
+ } else {
+ $self->{label} =~ s/\b([0-9]+)\b/$1>>0/eg;
+ }
+
+ if (!$self->{label} && $self->{index} && $self->{scale}==1 &&
+ $self->{base} =~ /(rbp|r13)/) {
+ $self->{base} = $self->{index}; $self->{index} = $1;
+ }
+
+ if ($gas) {
+ $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64");
+
+ if (defined($self->{index})) {
+ sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk},
+ $self->{label},
+ $self->{base}?"%$self->{base}":"",
+ $self->{index},$self->{scale};
+ } else {
+ sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base};
+ }
+ } else {
+ my %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR",
+ l=>"DWORD$PTR", d=>"DWORD$PTR",
+ q=>"QWORD$PTR", o=>"OWORD$PTR",
+ x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" );
+
+ $self->{label} =~ s/\./\$/g;
+ $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig;
+ $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
+
+ my $mnemonic = $self->{opcode}->mnemonic();
+ ($self->{asterisk}) && ($sz="q") ||
+ ($mnemonic =~ /^v?mov([qd])$/) && ($sz=$1) ||
+ ($mnemonic =~ /^v?pinsr([qdwb])$/) && ($sz=$1) ||
+ ($mnemonic =~ /^vpbroadcast([qdwb])$/) && ($sz=$1) ||
+ ($mnemonic =~ /^v(?!perm)[a-z]+[fi]128$/) && ($sz="x");
+
+ if (defined($self->{index})) {
+ sprintf "%s[%s%s*%d%s]",$szmap{$sz},
+ $self->{label}?"$self->{label}+":"",
+ $self->{index},$self->{scale},
+ $self->{base}?"+$self->{base}":"";
+ } elsif ($self->{base} eq "rip") {
+ sprintf "%s[%s]",$szmap{$sz},$self->{label};
+ } else {
+ sprintf "%s[%s%s]",$szmap{$sz},
+ $self->{label}?"$self->{label}+":"",
+ $self->{base};
+ }
+ }
+ }
+}
+{ package register; # pick up registers, which start with %.
+ sub re {
+ my ($class, $line, $opcode) = @_;
+ my $self = {};
+ my $ret;
+
+ # optional * ----vvv--- appears in indirect jmp/call
+ if ($$line =~ /^(\*?)%(\w+)/) {
+ bless $self,$class;
+ $self->{asterisk} = $1;
+ $self->{value} = $2;
+ $opcode->size($self->size());
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+ }
+ $ret;
+ }
+ sub size {
+ my $self = shift;
+ my $ret;
+
+ if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; }
+ elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; }
+ elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; }
+ elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; }
+ elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; }
+ elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; }
+ elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; }
+ elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; }
+
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ if ($gas) { sprintf "%s%%%s",$self->{asterisk},$self->{value}; }
+ else { $self->{value}; }
+ }
+}
+{ package label; # pick up labels, which end with :
+ sub re {
+ my ($class, $line) = @_;
+ my $self = {};
+ my $ret;
+
+ if ($$line =~ /(^[\.\w]+)\:/) {
+ bless $self,$class;
+ $self->{value} = $1;
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+
+ $self->{value} =~ s/^\.L/$decor/;
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+
+ if ($gas) {
+ my $func = ($globals{$self->{value}} or $self->{value}) . ":";
+ if ($win64 &&
+ $current_function->{name} eq $self->{value} &&
+ $current_function->{abi} eq "svr4") {
+ $func .= "\n";
+ $func .= " movq %rdi,8(%rsp)\n";
+ $func .= " movq %rsi,16(%rsp)\n";
+ $func .= " movq %rsp,%rax\n";
+ $func .= "${decor}SEH_begin_$current_function->{name}:\n";
+ my $narg = $current_function->{narg};
+ $narg=6 if (!defined($narg));
+ $func .= " movq %rcx,%rdi\n" if ($narg>0);
+ $func .= " movq %rdx,%rsi\n" if ($narg>1);
+ $func .= " movq %r8,%rdx\n" if ($narg>2);
+ $func .= " movq %r9,%rcx\n" if ($narg>3);
+ $func .= " movq 40(%rsp),%r8\n" if ($narg>4);
+ $func .= " movq 48(%rsp),%r9\n" if ($narg>5);
+ }
+ $func;
+ } elsif ($self->{value} ne "$current_function->{name}") {
+ # Make all labels in masm global.
+ $self->{value} .= ":" if ($masm);
+ $self->{value} . ":";
+ } elsif ($win64 && $current_function->{abi} eq "svr4") {
+ my $func = "$current_function->{name}" .
+ ($nasm ? ":" : "\tPROC $current_function->{scope}") .
+ "\n";
+ $func .= " mov QWORD$PTR\[8+rsp\],rdi\t;WIN64 prologue\n";
+ $func .= " mov QWORD$PTR\[16+rsp\],rsi\n";
+ $func .= " mov rax,rsp\n";
+ $func .= "${decor}SEH_begin_$current_function->{name}:";
+ $func .= ":" if ($masm);
+ $func .= "\n";
+ my $narg = $current_function->{narg};
+ $narg=6 if (!defined($narg));
+ $func .= " mov rdi,rcx\n" if ($narg>0);
+ $func .= " mov rsi,rdx\n" if ($narg>1);
+ $func .= " mov rdx,r8\n" if ($narg>2);
+ $func .= " mov rcx,r9\n" if ($narg>3);
+ $func .= " mov r8,QWORD$PTR\[40+rsp\]\n" if ($narg>4);
+ $func .= " mov r9,QWORD$PTR\[48+rsp\]\n" if ($narg>5);
+ $func .= "\n";
+ } else {
+ "$current_function->{name}".
+ ($nasm ? ":" : "\tPROC $current_function->{scope}");
+ }
+ }
+}
+{ package expr; # pick up expressions
+ sub re {
+ my ($class, $line, $opcode) = @_;
+ my $self = {};
+ my $ret;
+
+ if ($$line =~ /(^[^,]+)/) {
+ bless $self,$class;
+ $self->{value} = $1;
+ $ret = $self;
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+
+ $self->{value} =~ s/\@PLT// if (!$elf);
+ $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei;
+ $self->{value} =~ s/\.L/$decor/g;
+ $self->{opcode} = $opcode;
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ if ($nasm && $self->{opcode}->mnemonic()=~m/^j(?![re]cxz)/) {
+ "NEAR ".$self->{value};
+ } else {
+ $self->{value};
+ }
+ }
+}
+{ package directive; # pick up directives, which start with .
+ sub re {
+ my ($class, $line) = @_;
+ my $self = {};
+ my $ret;
+ my $dir;
+ my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2:
+ ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48,
+ "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48,
+ "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48,
+ "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48,
+ "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c,
+ "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c,
+ "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c,
+ "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c );
+
+ if ($$line =~ /^\s*(\.\w+)/) {
+ bless $self,$class;
+ $dir = $1;
+ $ret = $self;
+ undef $self->{value};
+ $$line = substr($$line,@+[0]); $$line =~ s/^\s+//;
+
+ SWITCH: for ($dir) {
+ /\.picmeup/ && do { if ($$line =~ /(%r[\w]+)/i) {
+ $dir="\t.long";
+ $$line=sprintf "0x%x,0x90000000",$opcode{$1};
+ }
+ last;
+ };
+ /\.global|\.globl|\.extern/
+ && do { $globals{$$line} = $prefix . $$line;
+ $$line = $globals{$$line} if ($prefix);
+ last;
+ };
+ /\.type/ && do { my ($sym,$type,$narg) = split(',',$$line);
+ if ($type eq "\@function") {
+ undef $current_function;
+ $current_function->{name} = $sym;
+ $current_function->{abi} = "svr4";
+ $current_function->{narg} = $narg;
+ $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE";
+ } elsif ($type eq "\@abi-omnipotent") {
+ undef $current_function;
+ $current_function->{name} = $sym;
+ $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE";
+ }
+ $$line =~ s/\@abi\-omnipotent/\@function/;
+ $$line =~ s/\@function.*/\@function/;
+ last;
+ };
+ /\.asciz/ && do { if ($$line =~ /^"(.*)"$/) {
+ $dir = ".byte";
+ $$line = join(",",unpack("C*",$1),0);
+ }
+ last;
+ };
+ /\.rva|\.long|\.quad/
+ && do { $$line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei;
+ $$line =~ s/\.L/$decor/g;
+ last;
+ };
+ }
+
+ if ($gas) {
+ $self->{value} = $dir . "\t" . $$line;
+
+ if ($dir =~ /\.extern/) {
+ $self->{value} = ""; # swallow extern
+ } elsif (!$elf && $dir =~ /\.type/) {
+ $self->{value} = "";
+ $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" .
+ (defined($globals{$1})?".scl 2;":".scl 3;") .
+ "\t.type 32;\t.endef"
+ if ($win64 && $$line =~ /([^,]+),\@function/);
+ } elsif (!$elf && $dir =~ /\.size/) {
+ $self->{value} = "";
+ if (defined($current_function)) {
+ $self->{value} .= "${decor}SEH_end_$current_function->{name}:"
+ if ($win64 && $current_function->{abi} eq "svr4");
+ undef $current_function;
+ }
+ } elsif (!$elf && $dir =~ /\.align/) {
+ $self->{value} = ".p2align\t" . (log($$line)/log(2));
+ } elsif ($dir eq ".section") {
+ $current_segment=$$line;
+ if (!$elf && $current_segment eq ".init") {
+ if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; }
+ elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; }
+ }
+ } elsif ($dir =~ /\.(text|data)/) {
+ $current_segment=".$1";
+ } elsif ($dir =~ /\.hidden/) {
+ if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$$line"; }
+ elsif ($flavour eq "mingw64") { $self->{value} = ""; }
+ } elsif ($dir =~ /\.comm/) {
+ $self->{value} = "$dir\t$prefix$$line";
+ $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx");
+ }
+ $$line = "";
+ return $self;
+ }
+
+ # non-gas case or nasm/masm
+ SWITCH: for ($dir) {
+ /\.text/ && do { my $v=undef;
+ if ($nasm) {
+ $v="section .text code align=64\n";
+ } else {
+ $v="$current_segment\tENDS\n" if ($current_segment);
+ $current_segment = ".text\$";
+ $v.="$current_segment\tSEGMENT ";
+ $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE";
+ $v.=" 'CODE'";
+ }
+ $self->{value} = $v;
+ last;
+ };
+ /\.data/ && do { my $v=undef;
+ if ($nasm) {
+ $v="section .data data align=8\n";
+ } else {
+ $v="$current_segment\tENDS\n" if ($current_segment);
+ $current_segment = "_DATA";
+ $v.="$current_segment\tSEGMENT";
+ }
+ $self->{value} = $v;
+ last;
+ };
+ /\.section/ && do { my $v=undef;
+ $$line =~ s/([^,]*).*/$1/;
+ $$line = ".CRT\$XCU" if ($$line eq ".init");
+ if ($nasm) {
+ $v="section $$line";
+ if ($$line=~/\.([px])data/) {
+ $v.=" rdata align=";
+ $v.=$1 eq "p"? 4 : 8;
+ } elsif ($$line=~/\.CRT\$/i) {
+ $v.=" rdata align=8";
+ }
+ } else {
+ $v="$current_segment\tENDS\n" if ($current_segment);
+ $v.="$$line\tSEGMENT";
+ if ($$line=~/\.([px])data/) {
+ $v.=" READONLY";
+ $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref);
+ } elsif ($$line=~/\.CRT\$/i) {
+ $v.=" READONLY ";
+ $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD";
+ }
+ }
+ $current_segment = $$line;
+ $self->{value} = $v;
+ last;
+ };
+ /\.extern/ && do { $self->{value} = "EXTERN\t".$$line;
+ $self->{value} .= ":NEAR" if ($masm);
+ last;
+ };
+ /\.globl|.global/
+ && do { $self->{value} = $masm?"PUBLIC":"global";
+ $self->{value} .= "\t".$$line;
+ last;
+ };
+ /\.size/ && do { if (defined($current_function)) {
+ undef $self->{value};
+ if ($current_function->{abi} eq "svr4") {
+ $self->{value}="${decor}SEH_end_$current_function->{name}:";
+ $self->{value}.=":\n" if($masm);
+ }
+ $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name});
+ undef $current_function;
+ }
+ last;
+ };
+ /\.align/ && do { my $max = ($masm && $masm>=$masmref) ? 256 : 4096;
+ $self->{value} = "ALIGN\t".($$line>$max?$max:$$line);
+ last;
+ };
+ /\.(value|long|rva|quad)/
+ && do { my $sz = substr($1,0,1);
+ my @arr = split(/,\s*/,$$line);
+ my $last = pop(@arr);
+ my $conv = sub { my $var=shift;
+ $var=~s/^(0b[0-1]+)/oct($1)/eig;
+ $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm);
+ if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva"))
+ { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; }
+ $var;
+ };
+
+ $sz =~ tr/bvlrq/BWDDQ/;
+ $self->{value} = "\tD$sz\t";
+ for (@arr) { $self->{value} .= &$conv($_).","; }
+ $self->{value} .= &$conv($last);
+ last;
+ };
+ /\.byte/ && do { my @str=split(/,\s*/,$$line);
+ map(s/(0b[0-1]+)/oct($1)/eig,@str);
+ map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm);
+ while ($#str>15) {
+ $self->{value}.="DB\t"
+ .join(",",@str[0..15])."\n";
+ foreach (0..15) { shift @str; }
+ }
+ $self->{value}.="DB\t"
+ .join(",",@str) if (@str);
+ last;
+ };
+ /\.comm/ && do { my @str=split(/,\s*/,$$line);
+ my $v=undef;
+ if ($nasm) {
+ $v.="common $prefix@str[0] @str[1]";
+ } else {
+ $v="$current_segment\tENDS\n" if ($current_segment);
+ $current_segment = "_DATA";
+ $v.="$current_segment\tSEGMENT\n";
+ $v.="COMM @str[0]:DWORD:".@str[1]/4;
+ }
+ $self->{value} = $v;
+ last;
+ };
+ }
+ $$line = "";
+ }
+
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ $self->{value};
+ }
+}
+
+sub rex {
+ my $opcode=shift;
+ my ($dst,$src,$rex)=@_;
+
+ $rex|=0x04 if($dst>=8);
+ $rex|=0x01 if($src>=8);
+ push @$opcode,($rex|0x40) if ($rex);
+}
+
+# Upon initial x86_64 introduction SSE>2 extensions were not introduced
+# yet. In order not to be bothered by tracing exact assembler versions,
+# but at the same time to provide a bare security minimum of AES-NI, we
+# hard-code some instructions. Extensions past AES-NI on the other hand
+# are traced by examining assembler version in individual perlasm
+# modules...
+
+my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3,
+ "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 );
+
+my $movq = sub { # elderly gas can't handle inter-register movq
+ my $arg = shift;
+ my @opcode=(0x66);
+ if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) {
+ my ($src,$dst)=($1,$2);
+ if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+ rex(\@opcode,$src,$dst,0x8);
+ push @opcode,0x0f,0x7e;
+ push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
+ @opcode;
+ } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) {
+ my ($src,$dst)=($2,$1);
+ if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+ rex(\@opcode,$src,$dst,0x8);
+ push @opcode,0x0f,0x6e;
+ push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $pextrd = sub {
+ if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) {
+ my @opcode=(0x66);
+ my $imm=$1;
+ my $src=$2;
+ my $dst=$3;
+ if ($dst =~ /%r([0-9]+)d/) { $dst = $1; }
+ elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; }
+ rex(\@opcode,$src,$dst);
+ push @opcode,0x0f,0x3a,0x16;
+ push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
+ push @opcode,$imm;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $pinsrd = sub {
+ if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x66);
+ my $imm=$1;
+ my $src=$2;
+ my $dst=$3;
+ if ($src =~ /%r([0-9]+)/) { $src = $1; }
+ elsif ($src =~ /%e/) { $src = $regrm{$src}; }
+ rex(\@opcode,$dst,$src);
+ push @opcode,0x0f,0x3a,0x22;
+ push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M
+ push @opcode,$imm;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $pshufb = sub {
+ if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x66);
+ rex(\@opcode,$2,$1);
+ push @opcode,0x0f,0x38,0x00;
+ push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $palignr = sub {
+ if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x66);
+ rex(\@opcode,$3,$2);
+ push @opcode,0x0f,0x3a,0x0f;
+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
+ push @opcode,$1;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $pclmulqdq = sub {
+ if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x66);
+ rex(\@opcode,$3,$2);
+ push @opcode,0x0f,0x3a,0x44;
+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
+ my $c=$1;
+ push @opcode,$c=~/^0/?oct($c):$c;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $rdrand = sub {
+ if (shift =~ /%[er](\w+)/) {
+ my @opcode=();
+ my $dst=$1;
+ if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+ rex(\@opcode,0,$dst,8);
+ push @opcode,0x0f,0xc7,0xf0|($dst&7);
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $rdseed = sub {
+ if (shift =~ /%[er](\w+)/) {
+ my @opcode=();
+ my $dst=$1;
+ if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+ rex(\@opcode,0,$dst,8);
+ push @opcode,0x0f,0xc7,0xf8|($dst&7);
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+sub rxb {
+ my $opcode=shift;
+ my ($dst,$src1,$src2,$rxb)=@_;
+
+ $rxb|=0x7<<5;
+ $rxb&=~(0x04<<5) if($dst>=8);
+ $rxb&=~(0x01<<5) if($src1>=8);
+ $rxb&=~(0x02<<5) if($src2>=8);
+ push @$opcode,$rxb;
+}
+
+my $vprotd = sub {
+ if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x8f);
+ rxb(\@opcode,$3,$2,-1,0x08);
+ push @opcode,0x78,0xc2;
+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
+ my $c=$1;
+ push @opcode,$c=~/^0/?oct($c):$c;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $vprotq = sub {
+ if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+ my @opcode=(0x8f);
+ rxb(\@opcode,$3,$2,-1,0x08);
+ push @opcode,0x78,0xc3;
+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
+ my $c=$1;
+ push @opcode,$c=~/^0/?oct($c):$c;
+ @opcode;
+ } else {
+ ();
+ }
+};
+
+my $endbranch = sub {
+ (0xf3,0x0f,0x1e,0xfa);
+};
+
+if ($nasm) {
+ print <<___;
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+___
+} elsif ($masm) {
+ print <<___;
+OPTION DOTNAME
+___
+}
+while(defined(my $line=<>)) {
+
+ $line =~ s|\R$||; # Better chomp
+
+ $line =~ s|[#!].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning
+ $line =~ s|\s+$||; # ... and at the end
+
+ if (my $label=label->re(\$line)) { print $label->out(); }
+
+ if (my $directive=directive->re(\$line)) {
+ printf "%s",$directive->out();
+ } elsif (my $opcode=opcode->re(\$line)) {
+ my $asm = eval("\$".$opcode->mnemonic());
+
+ if ((ref($asm) eq 'CODE') && scalar(my @bytes=&$asm($line))) {
+ print $gas?".byte\t":"DB\t",join(',',@bytes),"\n";
+ next;
+ }
+
+ my @args;
+ ARGUMENT: while (1) {
+ my $arg;
+
+ ($arg=register->re(\$line, $opcode))||
+ ($arg=const->re(\$line)) ||
+ ($arg=ea->re(\$line, $opcode)) ||
+ ($arg=expr->re(\$line, $opcode)) ||
+ last ARGUMENT;
+
+ push @args,$arg;
+
+ last ARGUMENT if ($line !~ /^,/);
+
+ $line =~ s/^,\s*//;
+ } # ARGUMENT:
+
+ if ($#args>=0) {
+ my $insn;
+ my $sz=$opcode->size();
+
+ if ($gas) {
+ $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz);
+ @args = map($_->out($sz),@args);
+ printf "\t%s\t%s",$insn,join(",",@args);
+ } else {
+ $insn = $opcode->out();
+ foreach (@args) {
+ my $arg = $_->out();
+ # $insn.=$sz compensates for movq, pinsrw, ...
+ if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; }
+ if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; }
+ if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; }
+ if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; }
+ }
+ @args = reverse(@args);
+ undef $sz if ($nasm && $opcode->mnemonic() eq "lea");
+ printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
+ }
+ } else {
+ printf "\t%s",$opcode->out();
+ }
+ }
+
+ print $line,"\n";
+}
+
+print "\n$current_segment\tENDS\n" if ($current_segment && $masm);
+print "END\n" if ($masm);
+
+close STDOUT;
+
+ #################################################
+# Cross-reference x86_64 ABI "card"
+#
+# Unix Win64
+# %rax * *
+# %rbx - -
+# %rcx #4 #1
+# %rdx #3 #2
+# %rsi #2 -
+# %rdi #1 -
+# %rbp - -
+# %rsp - -
+# %r8 #5 #3
+# %r9 #6 #4
+# %r10 * *
+# %r11 * *
+# %r12 - -
+# %r13 - -
+# %r14 - -
+# %r15 - -
+#
+# (*) volatile register
+# (-) preserved by callee
+# (#) Nth argument, volatile
+#
+# In Unix terms top of stack is argument transfer area for arguments
+# which could not be accommodated in registers. Or in other words 7th
+# [integer] argument resides at 8(%rsp) upon function entry point.
+# 128 bytes above %rsp constitute a "red zone" which is not touched
+# by signal handlers and can be used as temporal storage without
+# allocating a frame.
+#
+# In Win64 terms N*8 bytes on top of stack is argument transfer area,
+# which belongs to/can be overwritten by callee. N is the number of
+# arguments passed to callee, *but* not less than 4! This means that
+# upon function entry point 5th argument resides at 40(%rsp), as well
+# as that 32 bytes from 8(%rsp) can always be used as temporal
+# storage [without allocating a frame]. One can actually argue that
+# one can assume a "red zone" above stack pointer under Win64 as well.
+# Point is that at apparently no occasion Windows kernel would alter
+# the area above user stack pointer in true asynchronous manner...
+#
+# All the above means that if assembler programmer adheres to Unix
+# register and stack layout, but disregards the "red zone" existence,
+# it's possible to use following prologue and epilogue to "gear" from
+# Unix to Win64 ABI in leaf functions with not more than 6 arguments.
+#
+# omnipotent_function:
+# ifdef WIN64
+# movq %rdi,8(%rsp)
+# movq %rsi,16(%rsp)
+# movq %rcx,%rdi ; if 1st argument is actually present
+# movq %rdx,%rsi ; if 2nd argument is actually ...
+# movq %r8,%rdx ; if 3rd argument is ...
+# movq %r9,%rcx ; if 4th argument ...
+# movq 40(%rsp),%r8 ; if 5th ...
+# movq 48(%rsp),%r9 ; if 6th ...
+# endif
+# ...
+# ifdef WIN64
+# movq 8(%rsp),%rdi
+# movq 16(%rsp),%rsi
+# endif
+# ret
+#
+ #################################################
+# Win64 SEH, Structured Exception Handling.
+#
+# Unlike on Unix systems(*) lack of Win64 stack unwinding information
+# has undesired side-effect at run-time: if an exception is raised in
+# assembler subroutine such as those in question (basically we're
+# referring to segmentation violations caused by malformed input
+# parameters), the application is briskly terminated without invoking
+# any exception handlers, most notably without generating memory dump
+# or any user notification whatsoever. This poses a problem. It's
+# possible to address it by registering custom language-specific
+# handler that would restore processor context to the state at
+# subroutine entry point and return "exception is not handled, keep
+# unwinding" code. Writing such handler can be a challenge... But it's
+# doable, though requires certain coding convention. Consider following
+# snippet:
+#
+# .type function,@function
+# function:
+# movq %rsp,%rax # copy rsp to volatile register
+# pushq %r15 # save non-volatile registers
+# pushq %rbx
+# pushq %rbp
+# movq %rsp,%r11
+# subq %rdi,%r11 # prepare [variable] stack frame
+# andq $-64,%r11
+# movq %rax,0(%r11) # check for exceptions
+# movq %r11,%rsp # allocate [variable] stack frame
+# movq %rax,0(%rsp) # save original rsp value
+# magic_point:
+# ...
+# movq 0(%rsp),%rcx # pull original rsp value
+# movq -24(%rcx),%rbp # restore non-volatile registers
+# movq -16(%rcx),%rbx
+# movq -8(%rcx),%r15
+# movq %rcx,%rsp # restore original rsp
+# ret
+# .size function,.-function
+#
+# The key is that up to magic_point copy of original rsp value remains
+# in chosen volatile register and no non-volatile register, except for
+# rsp, is modified. While past magic_point rsp remains constant till
+# the very end of the function. In this case custom language-specific
+# exception handler would look like this:
+#
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+# { ULONG64 *rsp = (ULONG64 *)context->Rax;
+# if (context->Rip >= magic_point)
+# { rsp = ((ULONG64 **)context->Rsp)[0];
+# context->Rbp = rsp[-3];
+# context->Rbx = rsp[-2];
+# context->R15 = rsp[-1];
+# }
+# context->Rsp = (ULONG64)rsp;
+# context->Rdi = rsp[1];
+# context->Rsi = rsp[2];
+#
+# memcpy (disp->ContextRecord,context,sizeof(CONTEXT));
+# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase,
+# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord,
+# &disp->HandlerData,&disp->EstablisherFrame,NULL);
+# return ExceptionContinueSearch;
+# }
+#
+# It's appropriate to implement this handler in assembler, directly in
+# function's module. In order to do that one has to know members'
+# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant
+# values. Here they are:
+#
+# CONTEXT.Rax 120
+# CONTEXT.Rcx 128
+# CONTEXT.Rdx 136
+# CONTEXT.Rbx 144
+# CONTEXT.Rsp 152
+# CONTEXT.Rbp 160
+# CONTEXT.Rsi 168
+# CONTEXT.Rdi 176
+# CONTEXT.R8 184
+# CONTEXT.R9 192
+# CONTEXT.R10 200
+# CONTEXT.R11 208
+# CONTEXT.R12 216
+# CONTEXT.R13 224
+# CONTEXT.R14 232
+# CONTEXT.R15 240
+# CONTEXT.Rip 248
+# CONTEXT.Xmm6 512
+# sizeof(CONTEXT) 1232
+# DISPATCHER_CONTEXT.ControlPc 0
+# DISPATCHER_CONTEXT.ImageBase 8
+# DISPATCHER_CONTEXT.FunctionEntry 16
+# DISPATCHER_CONTEXT.EstablisherFrame 24
+# DISPATCHER_CONTEXT.TargetIp 32
+# DISPATCHER_CONTEXT.ContextRecord 40
+# DISPATCHER_CONTEXT.LanguageHandler 48
+# DISPATCHER_CONTEXT.HandlerData 56
+# UNW_FLAG_NHANDLER 0
+# ExceptionContinueSearch 1
+#
+# In order to tie the handler to the function one has to compose
+# couple of structures: one for .xdata segment and one for .pdata.
+#
+# UNWIND_INFO structure for .xdata segment would be
+#
+# function_unwind_info:
+# .byte 9,0,0,0
+# .rva handler
+#
+# This structure designates exception handler for a function with
+# zero-length prologue, no stack frame or frame register.
+#
+# To facilitate composing of .pdata structures, auto-generated "gear"
+# prologue copies rsp value to rax and denotes next instruction with
+# .LSEH_begin_{function_name} label. This essentially defines the SEH
+# styling rule mentioned in the beginning. Position of this label is
+# chosen in such manner that possible exceptions raised in the "gear"
+# prologue would be accounted to caller and unwound from latter's frame.
+# End of function is marked with respective .LSEH_end_{function_name}
+# label. To summarize, .pdata segment would contain
+#
+# .rva .LSEH_begin_function
+# .rva .LSEH_end_function
+# .rva function_unwind_info
+#
+# Reference to function_unwind_info from .xdata segment is the anchor.
+# In case you wonder why references are 32-bit .rvas and not 64-bit
+# .quads. References put into these two segments are required to be
+# *relative* to the base address of the current binary module, a.k.a.
+# image base. No Win64 module, be it .exe or .dll, can be larger than
+# 2GB and thus such relative references can be and are accommodated in
+# 32 bits.
+#
+# Having reviewed the example function code, one can argue that "movq
+# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix
+# rax would contain an undefined value. If this "offends" you, use
+# another register and refrain from modifying rax till magic_point is
+# reached, i.e. as if it was a non-volatile register. If more registers
+# are required prior [variable] frame setup is completed, note that
+# nobody says that you can have only one "magic point." You can
+# "liberate" non-volatile registers by denoting last stack off-load
+# instruction and reflecting it in finer grade unwind logic in handler.
+# After all, isn't it why it's called *language-specific* handler...
+#
+# Attentive reader can notice that exceptions would be mishandled in
+# auto-generated "gear" epilogue. Well, exception effectively can't
+# occur there, because if memory area used by it was subject to
+# segmentation violation, then it would be raised upon call to the
+# function (and as already mentioned be accounted to caller, which is
+# not a problem). If you're still not comfortable, then define tail
+# "magic point" just prior ret instruction and have handler treat it...
+#
+# (*) Note that we're talking about run-time, not debug-time. Lack of
+# unwind information makes debugging hard on both Windows and
+# Unix. "Unlike" referes to the fact that on Unix signal handler
+# will always be invoked, core dumped and appropriate exit code
+# returned to parent (for user notification).
diff --git a/openssl-1.1.0h/crypto/perlasm/x86asm.pl b/openssl-1.1.0h/crypto/perlasm/x86asm.pl
new file mode 100644
index 0000000..1ff46c9
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/x86asm.pl
@@ -0,0 +1,310 @@
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# require 'x86asm.pl';
+# &asm_init(<flavor>,"des-586.pl"[,$i386only]);
+# &function_begin("foo");
+# ...
+# &function_end("foo");
+# &asm_finish
+
+$out=();
+$i386=0;
+
+# AUTOLOAD is this context has quite unpleasant side effect, namely
+# that typos in function calls effectively go to assembler output,
+# but on the pros side we don't have to implement one subroutine per
+# each opcode...
+sub ::AUTOLOAD
+{ my $opcode = $AUTOLOAD;
+
+ die "more than 4 arguments passed to $opcode" if ($#_>3);
+
+ $opcode =~ s/.*:://;
+ if ($opcode =~ /^push/) { $stack+=4; }
+ elsif ($opcode =~ /^pop/) { $stack-=4; }
+
+ &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD";
+}
+
+sub ::emit
+{ my $opcode=shift;
+
+ if ($#_==-1) { push(@out,"\t$opcode\n"); }
+ else { push(@out,"\t$opcode\t".join(',',@_)."\n"); }
+}
+
+sub ::LB
+{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'";
+ $1."l";
+}
+sub ::HB
+{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'";
+ $1."h";
+}
+sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num); }
+sub ::stack_pop { my $num=$_[0]*4; $stack-=$num; &add("esp",$num); }
+sub ::blindpop { &pop($_[0]); $stack+=4; }
+sub ::wparam { &DWP($stack+4*$_[0],"esp"); }
+sub ::swtmp { &DWP(4*$_[0],"esp"); }
+
+sub ::bswap
+{ if ($i386) # emulate bswap for i386
+ { &comment("bswap @_");
+ &xchg(&HB(@_),&LB(@_));
+ &ror (@_,16);
+ &xchg(&HB(@_),&LB(@_));
+ }
+ else
+ { &generic("bswap",@_); }
+}
+# These are made-up opcodes introduced over the years essentially
+# by ignorance, just alias them to real ones...
+sub ::movb { &mov(@_); }
+sub ::xorb { &xor(@_); }
+sub ::rotl { &rol(@_); }
+sub ::rotr { &ror(@_); }
+sub ::exch { &xchg(@_); }
+sub ::halt { &hlt; }
+sub ::movz { &movzx(@_); }
+sub ::pushf { &pushfd; }
+sub ::popf { &popfd; }
+
+# 3 argument instructions
+sub ::movq
+{ my($p1,$p2,$optimize)=@_;
+
+ if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+ # movq between mmx registers can sink Intel CPUs
+ { &::pshufw($p1,$p2,0xe4); }
+ else
+ { &::generic("movq",@_); }
+}
+
+# SSE>2 instructions
+my %regrm = ( "eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3,
+ "esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7 );
+sub ::pextrd
+{ my($dst,$src,$imm)=@_;
+ if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/)
+ { &::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm); }
+ else
+ { &::generic("pextrd",@_); }
+}
+
+sub ::pinsrd
+{ my($dst,$src,$imm)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/)
+ { &::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm); }
+ else
+ { &::generic("pinsrd",@_); }
+}
+
+sub ::pshufb
+{ my($dst,$src)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+ { &data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2); }
+ else
+ { &::generic("pshufb",@_); }
+}
+
+sub ::palignr
+{ my($dst,$src,$imm)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+ { &::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm); }
+ else
+ { &::generic("palignr",@_); }
+}
+
+sub ::pclmulqdq
+{ my($dst,$src,$imm)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+ { &::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm); }
+ else
+ { &::generic("pclmulqdq",@_); }
+}
+
+sub ::rdrand
+{ my ($dst)=@_;
+ if ($dst =~ /(e[a-dsd][ixp])/)
+ { &::data_byte(0x0f,0xc7,0xf0|$regrm{$dst}); }
+ else
+ { &::generic("rdrand",@_); }
+}
+
+sub ::rdseed
+{ my ($dst)=@_;
+ if ($dst =~ /(e[a-dsd][ixp])/)
+ { &::data_byte(0x0f,0xc7,0xf8|$regrm{$dst}); }
+ else
+ { &::generic("rdrand",@_); }
+}
+
+sub rxb {
+ local *opcode=shift;
+ my ($dst,$src1,$src2,$rxb)=@_;
+
+ $rxb|=0x7<<5;
+ $rxb&=~(0x04<<5) if($dst>=8);
+ $rxb&=~(0x01<<5) if($src1>=8);
+ $rxb&=~(0x02<<5) if($src2>=8);
+ push @opcode,$rxb;
+}
+
+sub ::vprotd
+{ my $args=join(',',@_);
+ if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/)
+ { my @opcode=(0x8f);
+ rxb(\@opcode,$1,$2,-1,0x08);
+ push @opcode,0x78,0xc2;
+ push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M
+ my $c=$3;
+ push @opcode,$c=~/^0/?oct($c):$c;
+ &::data_byte(@opcode);
+ }
+ else
+ { &::generic("vprotd",@_); }
+}
+
+sub ::endbranch
+{
+ &::data_byte(0xf3,0x0f,0x1e,0xfb);
+}
+
+# label management
+$lbdecor="L"; # local label decoration, set by package
+$label="000";
+
+sub ::islabel # see is argument is a known label
+{ my $i;
+ foreach $i (values %label) { return $i if ($i eq $_[0]); }
+ $label{$_[0]}; # can be undef
+}
+
+sub ::label # instantiate a function-scope label
+{ if (!defined($label{$_[0]}))
+ { $label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++; }
+ $label{$_[0]};
+}
+
+sub ::LABEL # instantiate a file-scope label
+{ $label{$_[0]}=$_[1] if (!defined($label{$_[0]}));
+ $label{$_[0]};
+}
+
+sub ::static_label { &::LABEL($_[0],$lbdecor.$_[0]); }
+
+sub ::set_label_B { push(@out,"@_:\n"); }
+sub ::set_label
+{ my $label=&::label($_[0]);
+ &::align($_[1]) if ($_[1]>1);
+ &::set_label_B($label);
+ $label;
+}
+
+sub ::wipe_labels # wipes function-scope labels
+{ foreach $i (keys %label)
+ { delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/); }
+}
+
+# subroutine management
+sub ::function_begin
+{ &function_begin_B(@_);
+ $stack=4;
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+}
+
+sub ::function_end
+{ &pop("edi");
+ &pop("esi");
+ &pop("ebx");
+ &pop("ebp");
+ &ret();
+ &function_end_B(@_);
+ $stack=0;
+ &wipe_labels();
+}
+
+sub ::function_end_A
+{ &pop("edi");
+ &pop("esi");
+ &pop("ebx");
+ &pop("ebp");
+ &ret();
+ $stack+=16; # readjust esp as if we didn't pop anything
+}
+
+sub ::asciz
+{ my @str=unpack("C*",shift);
+ push @str,0;
+ while ($#str>15) {
+ &data_byte(@str[0..15]);
+ foreach (0..15) { shift @str; }
+ }
+ &data_byte(@str) if (@str);
+}
+
+sub ::asm_finish
+{ &file_end();
+ print @out;
+}
+
+sub ::asm_init
+{ my ($type,$fn,$cpu)=@_;
+
+ $filename=$fn;
+ $i386=$cpu;
+
+ $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0;
+ if (($type eq "elf"))
+ { $elf=1; require "x86gas.pl"; }
+ elsif (($type eq "elf-1"))
+ { $elf=-1; require "x86gas.pl"; }
+ elsif (($type eq "a\.out"))
+ { $aout=1; require "x86gas.pl"; }
+ elsif (($type eq "coff" or $type eq "gaswin"))
+ { $coff=1; require "x86gas.pl"; }
+ elsif (($type eq "win32n"))
+ { $win32=1; require "x86nasm.pl"; }
+ elsif (($type eq "nw-nasm"))
+ { $netware=1; require "x86nasm.pl"; }
+ #elsif (($type eq "nw-mwasm"))
+ #{ $netware=1; $mwerks=1; require "x86nasm.pl"; }
+ elsif (($type eq "win32"))
+ { $win32=1; require "x86masm.pl"; }
+ elsif (($type eq "macosx"))
+ { $aout=1; $macosx=1; require "x86gas.pl"; }
+ elsif (($type eq "android"))
+ { $elf=1; $android=1; require "x86gas.pl"; }
+ else
+ { print STDERR <<"EOF";
+Pick one target type from
+ elf - Linux, FreeBSD, Solaris x86, etc.
+ a.out - DJGPP, elder OpenBSD, etc.
+ coff - GAS/COFF such as Win32 targets
+ win32n - Windows 95/Windows NT NASM format
+ nw-nasm - NetWare NASM format
+ macosx - Mac OS X
+EOF
+ exit(1);
+ }
+
+ $pic=0;
+ for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); }
+
+ $filename =~ s/\.pl$//;
+ &file($filename);
+}
+
+sub ::hidden {}
+
+1;
diff --git a/openssl-1.1.0h/crypto/perlasm/x86gas.pl b/openssl-1.1.0h/crypto/perlasm/x86gas.pl
new file mode 100644
index 0000000..2c8fce0
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/x86gas.pl
@@ -0,0 +1,265 @@
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+package x86gas;
+
+*out=\@::out;
+
+$::lbdecor=$::aout?"L":".L"; # local label decoration
+$nmdecor=($::aout or $::coff)?"_":""; # external name decoration
+
+$initseg="";
+
+$align=16;
+$align=log($align)/log(2) if ($::aout);
+$com_start="#" if ($::aout or $::coff);
+
+sub opsize()
+{ my $reg=shift;
+ if ($reg =~ m/^%e/o) { "l"; }
+ elsif ($reg =~ m/^%[a-d][hl]$/o) { "b"; }
+ elsif ($reg =~ m/^%[yxm]/o) { undef; }
+ else { "w"; }
+}
+
+# swap arguments;
+# expand opcode with size suffix;
+# prefix numeric constants with $;
+sub ::generic
+{ my($opcode,@arg)=@_;
+ my($suffix,$dst,$src);
+
+ @arg=reverse(@arg);
+
+ for (@arg)
+ { s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o; # gp registers
+ s/^([xy]?mm[0-7])$/%$1/o; # xmm/mmx registers
+ s/^(\-?[0-9]+)$/\$$1/o; # constants
+ s/^(\-?0x[0-9a-f]+)$/\$$1/o; # constants
+ }
+
+ $dst = $arg[$#arg] if ($#arg>=0);
+ $src = $arg[$#arg-1] if ($#arg>=1);
+ if ($dst =~ m/^%/o) { $suffix=&opsize($dst); }
+ elsif ($src =~ m/^%/o) { $suffix=&opsize($src); }
+ else { $suffix="l"; }
+ undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o);
+
+ if ($#_==0) { &::emit($opcode); }
+ elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o)
+ { &::emit($opcode,@arg); }
+ else { &::emit($opcode.$suffix,@arg);}
+
+ 1;
+}
+#
+# opcodes not covered by ::generic above, mostly inconsistent namings...
+#
+sub ::movzx { &::movzb(@_); }
+sub ::pushfd { &::pushfl; }
+sub ::popfd { &::popfl; }
+sub ::cpuid { &::emit(".byte\t0x0f,0xa2"); }
+sub ::rdtsc { &::emit(".byte\t0x0f,0x31"); }
+
+sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); }
+sub ::call_ptr { &::generic("call","*$_[0]"); }
+sub ::jmp_ptr { &::generic("jmp","*$_[0]"); }
+
+*::bswap = sub { &::emit("bswap","%$_[0]"); } if (!$::i386);
+
+sub ::DWP
+{ my($addr,$reg1,$reg2,$idx)=@_;
+ my $ret="";
+
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
+ $addr =~ s/^\s+//;
+ # prepend global references with optional underscore
+ $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
+
+ $reg1 = "%$reg1" if ($reg1);
+ $reg2 = "%$reg2" if ($reg2);
+
+ $ret .= $addr if (($addr ne "") && ($addr ne 0));
+
+ if ($reg2)
+ { $idx!= 0 or $idx=1;
+ $ret .= "($reg1,$reg2,$idx)";
+ }
+ elsif ($reg1)
+ { $ret .= "($reg1)"; }
+
+ $ret;
+}
+sub ::QWP { &::DWP(@_); }
+sub ::BP { &::DWP(@_); }
+sub ::WP { &::DWP(@_); }
+sub ::BC { @_; }
+sub ::DWC { @_; }
+
+sub ::file
+{ push(@out,".file\t\"$_[0].s\"\n.text\n"); }
+
+sub ::function_begin_B
+{ my $func=shift;
+ my $global=($func !~ /^_/);
+ my $begin="${::lbdecor}_${func}_begin";
+
+ &::LABEL($func,$global?"$begin":"$nmdecor$func");
+ $func=$nmdecor.$func;
+
+ push(@out,".globl\t$func\n") if ($global);
+ if ($::coff)
+ { push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); }
+ elsif (($::aout and !$::pic) or $::macosx)
+ { }
+ else
+ { push(@out,".type $func,\@function\n"); }
+ push(@out,".align\t$align\n");
+ push(@out,"$func:\n");
+ push(@out,"$begin:\n") if ($global);
+ $::stack=4;
+}
+
+sub ::function_end_B
+{ my $func=shift;
+ push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf);
+ $::stack=0;
+ &::wipe_labels();
+}
+
+sub ::comment
+ {
+ if (!defined($com_start) or $::elf)
+ { # Regarding $::elf above...
+ # GNU and SVR4 as'es use different comment delimiters,
+ push(@out,"\n"); # so we just skip ELF comments...
+ return;
+ }
+ foreach (@_)
+ {
+ if (/^\s*$/)
+ { push(@out,"\n"); }
+ else
+ { push(@out,"\t$com_start $_ $com_end\n"); }
+ }
+ }
+
+sub ::external_label
+{ foreach(@_) { &::LABEL($_,$nmdecor.$_); } }
+
+sub ::public_label
+{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); }
+
+sub ::file_end
+{ if ($::macosx)
+ { if (%non_lazy_ptr)
+ { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n");
+ foreach $i (keys %non_lazy_ptr)
+ { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); }
+ }
+ }
+ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
+ my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
+ if ($::macosx) { push (@out,"$tmp,2\n"); }
+ elsif ($::elf) { push (@out,"$tmp,4\n"); }
+ else { push (@out,"$tmp\n"); }
+ }
+ push(@out,$initseg) if ($initseg);
+}
+
+sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); }
+sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); }
+sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); }
+
+sub ::align
+{ my $val=$_[0];
+ if ($::aout)
+ { $val=int(log($val)/log(2));
+ $val.=",0x90";
+ }
+ push(@out,".align\t$val\n");
+}
+
+sub ::picmeup
+{ my($dst,$sym,$base,$reflabel)=@_;
+
+ if (($::pic && ($::elf || $::aout)) || $::macosx)
+ { if (!defined($base))
+ { &::call(&::label("PIC_me_up"));
+ &::set_label("PIC_me_up");
+ &::blindpop($dst);
+ $base=$dst;
+ $reflabel=&::label("PIC_me_up");
+ }
+ if ($::macosx)
+ { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr");
+ &::mov($dst,&::DWP("$indirect-$reflabel",$base));
+ $non_lazy_ptr{"$nmdecor$sym"}=$indirect;
+ }
+ elsif ($sym eq "OPENSSL_ia32cap_P" && $::elf>0)
+ { &::lea($dst,&::DWP("$sym-$reflabel",$base)); }
+ else
+ { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+ $base));
+ &::mov($dst,&::DWP("$sym\@GOT",$dst));
+ }
+ }
+ else
+ { &::lea($dst,&::DWP($sym)); }
+}
+
+sub ::initseg
+{ my $f=$nmdecor.shift;
+
+ if ($::android)
+ { $initseg.=<<___;
+.section .init_array
+.align 4
+.long $f
+___
+ }
+ elsif ($::elf)
+ { $initseg.=<<___;
+.section .init
+ call $f
+___
+ }
+ elsif ($::coff)
+ { $initseg.=<<___; # applies to both Cygwin and Mingw
+.section .ctors
+.long $f
+___
+ }
+ elsif ($::macosx)
+ { $initseg.=<<___;
+.mod_init_func
+.align 2
+.long $f
+___
+ }
+ elsif ($::aout)
+ { my $ctor="${nmdecor}_GLOBAL_\$I\$$f";
+ $initseg.=".text\n";
+ $initseg.=".type $ctor,\@function\n" if ($::pic);
+ $initseg.=<<___; # OpenBSD way...
+.globl $ctor
+.align 2
+$ctor:
+ jmp $f
+___
+ }
+}
+
+sub ::dataseg
+{ push(@out,".data\n"); }
+
+*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
+
+1;
diff --git a/openssl-1.1.0h/crypto/perlasm/x86masm.pl b/openssl-1.1.0h/crypto/perlasm/x86masm.pl
new file mode 100644
index 0000000..d352f47
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/x86masm.pl
@@ -0,0 +1,207 @@
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+package x86masm;
+
+*out=\@::out;
+
+$::lbdecor="\$L"; # local label decoration
+$nmdecor="_"; # external name decoration
+
+$initseg="";
+$segment="";
+
+sub ::generic
+{ my ($opcode,@arg)=@_;
+
+ # fix hexadecimal constants
+ for (@arg) { s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/oi; }
+
+ if ($opcode =~ /lea/ && @arg[1] =~ s/.*PTR\s+(\(.*\))$/OFFSET $1/) # no []
+ { $opcode="mov"; }
+ elsif ($opcode !~ /mov[dq]$/)
+ { # fix xmm references
+ $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[-1]=~/\bxmm[0-7]\b/i);
+ $arg[-1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i);
+ }
+
+ &::emit($opcode,@arg);
+ 1;
+}
+#
+# opcodes not covered by ::generic above, mostly inconsistent namings...
+#
+sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); }
+sub ::call_ptr { &::emit("call",@_); }
+sub ::jmp_ptr { &::emit("jmp",@_); }
+sub ::lock { &::data_byte(0xf0); }
+
+sub get_mem
+{ my($size,$addr,$reg1,$reg2,$idx)=@_;
+ my($post,$ret);
+
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
+ $ret .= "$size PTR " if ($size ne "");
+
+ $addr =~ s/^\s+//;
+ # prepend global references with optional underscore
+ $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
+ # put address arithmetic expression in parenthesis
+ $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/);
+
+ if (($addr ne "") && ($addr ne 0))
+ { if ($addr !~ /^-/) { $ret .= "$addr"; }
+ else { $post=$addr; }
+ }
+ $ret .= "[";
+
+ if ($reg2 ne "")
+ { $idx!=0 or $idx=1;
+ $ret .= "$reg2*$idx";
+ $ret .= "+$reg1" if ($reg1 ne "");
+ }
+ else
+ { $ret .= "$reg1"; }
+
+ $ret .= "$post]";
+ $ret =~ s/\+\]/]/; # in case $addr was the only argument
+ $ret =~ s/\[\s*\]//;
+
+ $ret;
+}
+sub ::BP { &get_mem("BYTE",@_); }
+sub ::WP { &get_mem("WORD",@_); }
+sub ::DWP { &get_mem("DWORD",@_); }
+sub ::QWP { &get_mem("QWORD",@_); }
+sub ::BC { "@_"; }
+sub ::DWC { "@_"; }
+
+sub ::file
+{ my $tmp=<<___;
+TITLE $_[0].asm
+IF \@Version LT 800
+ECHO MASM version 8.00 or later is strongly recommended.
+ENDIF
+.686
+.MODEL FLAT
+OPTION DOTNAME
+IF \@Version LT 800
+.text\$ SEGMENT PAGE 'CODE'
+ELSE
+.text\$ SEGMENT ALIGN(64) 'CODE'
+ENDIF
+___
+ push(@out,$tmp);
+ $segment = ".text\$";
+}
+
+sub ::function_begin_B
+{ my $func=shift;
+ my $global=($func !~ /^_/);
+ my $begin="${::lbdecor}_${func}_begin";
+
+ &::LABEL($func,$global?"$begin":"$nmdecor$func");
+ $func="ALIGN\t16\n".$nmdecor.$func."\tPROC";
+
+ if ($global) { $func.=" PUBLIC\n${begin}::\n"; }
+ else { $func.=" PRIVATE\n"; }
+ push(@out,$func);
+ $::stack=4;
+}
+sub ::function_end_B
+{ my $func=shift;
+
+ push(@out,"$nmdecor$func ENDP\n");
+ $::stack=0;
+ &::wipe_labels();
+}
+
+sub ::file_end
+{ my $xmmheader=<<___;
+.686
+.XMM
+IF \@Version LT 800
+XMMWORD STRUCT 16
+DQ 2 dup (?)
+XMMWORD ENDS
+ENDIF
+___
+ if (grep {/\b[x]?mm[0-7]\b/i} @out) {
+ grep {s/\.[3-7]86/$xmmheader/} @out;
+ }
+
+ push(@out,"$segment ENDS\n");
+
+ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
+ { my $comm=<<___;
+.bss SEGMENT 'BSS'
+COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
+.bss ENDS
+___
+ # comment out OPENSSL_ia32cap_P declarations
+ grep {s/(^EXTERN\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
+ push (@out,$comm);
+ }
+ push (@out,$initseg) if ($initseg);
+ push (@out,"END\n");
+}
+
+sub ::comment { foreach (@_) { push(@out,"\t; $_\n"); } }
+
+*::set_label_B = sub
+{ my $l=shift; push(@out,$l.($l=~/^\Q${::lbdecor}\E[0-9]{3}/?":\n":"::\n")); };
+
+sub ::external_label
+{ foreach(@_)
+ { push(@out, "EXTERN\t".&::LABEL($_,$nmdecor.$_).":NEAR\n"); }
+}
+
+sub ::public_label
+{ push(@out,"PUBLIC\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); }
+
+sub ::data_byte
+{ push(@out,("DB\t").join(',',splice(@_,0,16))."\n") while(@_); }
+
+sub ::data_short
+{ push(@out,("DW\t").join(',',splice(@_,0,8))."\n") while(@_); }
+
+sub ::data_word
+{ push(@out,("DD\t").join(',',splice(@_,0,4))."\n") while(@_); }
+
+sub ::align
+{ push(@out,"ALIGN\t$_[0]\n"); }
+
+sub ::picmeup
+{ my($dst,$sym)=@_;
+ &::lea($dst,&::DWP($sym));
+}
+
+sub ::initseg
+{ my $f=$nmdecor.shift;
+
+ $initseg.=<<___;
+.CRT\$XCU SEGMENT DWORD PUBLIC 'DATA'
+EXTERN $f:NEAR
+DD $f
+.CRT\$XCU ENDS
+___
+}
+
+sub ::dataseg
+{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; }
+
+sub ::safeseh
+{ my $nm=shift;
+ push(@out,"IF \@Version GE 710\n");
+ push(@out,".SAFESEH ".&::LABEL($nm,$nmdecor.$nm)."\n");
+ push(@out,"ENDIF\n");
+}
+
+1;
diff --git a/openssl-1.1.0h/crypto/perlasm/x86nasm.pl b/openssl-1.1.0h/crypto/perlasm/x86nasm.pl
new file mode 100644
index 0000000..4b664a8
--- /dev/null
+++ b/openssl-1.1.0h/crypto/perlasm/x86nasm.pl
@@ -0,0 +1,186 @@
+#! /usr/bin/env perl
+# Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+package x86nasm;
+
+*out=\@::out;
+
+$::lbdecor="L\$"; # local label decoration
+$nmdecor=$::netware?"":"_"; # external name decoration
+$drdecor=$::mwerks?".":""; # directive decoration
+
+$initseg="";
+
+sub ::generic
+{ my $opcode=shift;
+ my $tmp;
+
+ if (!$::mwerks)
+ { if ($opcode =~ m/^j/o && $#_==0) # optimize jumps
+ { $_[0] = "NEAR $_[0]"; }
+ elsif ($opcode eq "lea" && $#_==1) # wipe storage qualifier from lea
+ { $_[1] =~ s/^[^\[]*\[/\[/o; }
+ elsif ($opcode eq "clflush" && $#_==0)
+ { $_[0] =~ s/^[^\[]*\[/\[/o; }
+ }
+ &::emit($opcode,@_);
+ 1;
+}
+#
+# opcodes not covered by ::generic above, mostly inconsistent namings...
+#
+sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); }
+sub ::call_ptr { &::emit("call",@_); }
+sub ::jmp_ptr { &::emit("jmp",@_); }
+
+sub get_mem
+{ my($size,$addr,$reg1,$reg2,$idx)=@_;
+ my($post,$ret);
+
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
+ if ($size ne "")
+ { $ret .= "$size";
+ $ret .= " PTR" if ($::mwerks);
+ $ret .= " ";
+ }
+ $ret .= "[";
+
+ $addr =~ s/^\s+//;
+ # prepend global references with optional underscore
+ $addr =~ s/^([^\+\-0-9][^\+\-]*)/::islabel($1) or "$nmdecor$1"/ige;
+ # put address arithmetic expression in parenthesis
+ $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/);
+
+ if (($addr ne "") && ($addr ne 0))
+ { if ($addr !~ /^-/) { $ret .= "$addr+"; }
+ else { $post=$addr; }
+ }
+
+ if ($reg2 ne "")
+ { $idx!=0 or $idx=1;
+ $ret .= "$reg2*$idx";
+ $ret .= "+$reg1" if ($reg1 ne "");
+ }
+ else
+ { $ret .= "$reg1"; }
+
+ $ret .= "$post]";
+ $ret =~ s/\+\]/]/; # in case $addr was the only argument
+
+ $ret;
+}
+sub ::BP { &get_mem("BYTE",@_); }
+sub ::DWP { &get_mem("DWORD",@_); }
+sub ::WP { &get_mem("WORD",@_); }
+sub ::QWP { &get_mem("",@_); }
+sub ::BC { (($::mwerks)?"":"BYTE ")."@_"; }
+sub ::DWC { (($::mwerks)?"":"DWORD ")."@_"; }
+
+sub ::file
+{ if ($::mwerks) { push(@out,".section\t.text,64\n"); }
+ else
+ { my $tmp=<<___;
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+\$\@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+___
+ push(@out,$tmp);
+ }
+}
+
+sub ::function_begin_B
+{ my $func=shift;
+ my $global=($func !~ /^_/);
+ my $begin="${::lbdecor}_${func}_begin";
+
+ $begin =~ s/^\@/./ if ($::mwerks); # the torture never stops
+
+ &::LABEL($func,$global?"$begin":"$nmdecor$func");
+ $func=$nmdecor.$func;
+
+ push(@out,"${drdecor}global $func\n") if ($global);
+ push(@out,"${drdecor}align 16\n");
+ push(@out,"$func:\n");
+ push(@out,"$begin:\n") if ($global);
+ $::stack=4;
+}
+
+sub ::function_end_B
+{ $::stack=0;
+ &::wipe_labels();
+}
+
+sub ::file_end
+{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
+ { my $comm=<<___;
+${drdecor}segment .bss
+${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
+___
+ # comment out OPENSSL_ia32cap_P declarations
+ grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
+ push (@out,$comm)
+ }
+ push (@out,$initseg) if ($initseg);
+}
+
+sub ::comment { foreach (@_) { push(@out,"\t; $_\n"); } }
+
+sub ::external_label
+{ foreach(@_)
+ { push(@out,"${drdecor}extern\t".&::LABEL($_,$nmdecor.$_)."\n"); }
+}
+
+sub ::public_label
+{ push(@out,"${drdecor}global\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); }
+
+sub ::data_byte
+{ push(@out,(($::mwerks)?".byte\t":"db\t").join(',',@_)."\n"); }
+sub ::data_short
+{ push(@out,(($::mwerks)?".word\t":"dw\t").join(',',@_)."\n"); }
+sub ::data_word
+{ push(@out,(($::mwerks)?".long\t":"dd\t").join(',',@_)."\n"); }
+
+sub ::align
+{ push(@out,"${drdecor}align\t$_[0]\n"); }
+
+sub ::picmeup
+{ my($dst,$sym)=@_;
+ &::lea($dst,&::DWP($sym));
+}
+
+sub ::initseg
+{ my $f=$nmdecor.shift;
+ if ($::win32)
+ { $initseg=<<___;
+segment .CRT\$XCU data align=4
+extern $f
+dd $f
+___
+ }
+}
+
+sub ::dataseg
+{ if ($mwerks) { push(@out,".section\t.data,4\n"); }
+ else { push(@out,"section\t.data align=4\n"); }
+}
+
+sub ::safeseh
+{ my $nm=shift;
+ push(@out,"%if __NASM_VERSION_ID__ >= 0x02030000\n");
+ push(@out,"safeseh ".&::LABEL($nm,$nmdecor.$nm)."\n");
+ push(@out,"%endif\n");
+}
+
+1;