diff options
Diffstat (limited to 'openssl-1.1.0h/crypto/perlasm')
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/README | 124 | ||||
| -rwxr-xr-x | openssl-1.1.0h/crypto/perlasm/arm-xlate.pl | 177 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/cbc.pl | 356 | ||||
| -rwxr-xr-x | openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl | 265 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl | 1702 | ||||
| -rwxr-xr-x | openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl | 1186 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/x86asm.pl | 310 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/x86gas.pl | 265 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/x86masm.pl | 207 | ||||
| -rw-r--r-- | openssl-1.1.0h/crypto/perlasm/x86nasm.pl | 186 | 
10 files changed, 4778 insertions, 0 deletions
| diff --git a/openssl-1.1.0h/crypto/perlasm/README b/openssl-1.1.0h/crypto/perlasm/README new file mode 100644 index 0000000..e90bd8e --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/README @@ -0,0 +1,124 @@ +The perl scripts in this directory are my 'hack' to generate +multiple different assembler formats via the one original script. + +The way to use this library is to start with adding the path to this directory +and then include it. + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +The first thing we do is setup the file and type of assembler + +&asm_init($ARGV[0],$0); + +The first argument is the 'type'.  Currently +'cpp', 'sol', 'a.out', 'elf' or 'win32'. +Argument 2 is the file name. + +The reciprocal function is +&asm_finish() which should be called at the end. + +There are 2 main 'packages'. x86ms.pl, which is the Microsoft assembler, +and x86unix.pl which is the unix (gas) version. + +Functions of interest are: +&external_label("des_SPtrans");	declare and external variable +&LB(reg);			Low byte for a register +&HB(reg);			High byte for a register +&BP(off,base,index,scale)	Byte pointer addressing +&DWP(off,base,index,scale)	Word pointer addressing +&stack_push(num)		Basically a 'sub esp, num*4' with extra +&stack_pop(num)			inverse of stack_push +&function_begin(name,extra)	Start a function with pushing of +				edi, esi, ebx and ebp.  extra is extra win32 +				external info that may be required. +&function_begin_B(name,extra)	Same as normal function_begin but no pushing. +&function_end(name)		Call at end of function. +&function_end_A(name)		Standard pop and ret, for use inside functions +&function_end_B(name)		Call at end but with poping or 'ret'. +&swtmp(num)			Address on stack temp word. +&wparam(num)			Parameter number num, that was push +				in C convention.  This all works over pushes +				and pops. +&comment("hello there")		Put in a comment. +&label("loop")			Refer to a label, normally a jmp target. +&set_label("loop")		Set a label at this point. +&data_word(word)		Put in a word of data. + +So how does this all hold together?  Given + +int calc(int len, int *data) +	{ +	int i,j=0; + +	for (i=0; i<len; i++) +		{ +		j+=other(data[i]); +		} +	} + +So a very simple version of this function could be coded as + +	push(@INC,"perlasm","../../perlasm"); +	require "x86asm.pl"; +	 +	&asm_init($ARGV[0],"cacl.pl"); + +	&external_label("other"); + +	$tmp1=	"eax"; +	$j=	"edi"; +	$data=	"esi"; +	$i=	"ebp"; + +	&comment("a simple function"); +	&function_begin("calc"); +	&mov(	$data,		&wparam(1)); # data +	&xor(	$j,		$j); +	&xor(	$i,		$i); + +	&set_label("loop"); +	&cmp(	$i,		&wparam(0)); +	&jge(	&label("end")); + +	&mov(	$tmp1,		&DWP(0,$data,$i,4)); +	&push(	$tmp1); +	&call(	"other"); +	&add(	$j,		"eax"); +	&pop(	$tmp1); +	&inc(	$i); +	&jmp(	&label("loop")); + +	&set_label("end"); +	&mov(	"eax",		$j); + +	&function_end("calc"); + +	&asm_finish(); + +The above example is very very unoptimised but gives an idea of how +things work. + +There is also a cbc mode function generator in cbc.pl + +&cbc(	$name, +	$encrypt_function_name, +	$decrypt_function_name, +	$true_if_byte_swap_needed, +	$parameter_number_for_iv, +	$parameter_number_for_encrypt_flag, +	$first_parameter_to_pass, +	$second_parameter_to_pass, +	$third_parameter_to_pass); + +So for example, given +void BF_encrypt(BF_LONG *data,BF_KEY *key); +void BF_decrypt(BF_LONG *data,BF_KEY *key); +void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length, +        BF_KEY *ks, unsigned char *iv, int enc); + +&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1); + +&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); +&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); + diff --git a/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl b/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl new file mode 100755 index 0000000..ca2f8b9 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/arm-xlate.pl @@ -0,0 +1,177 @@ +#! /usr/bin/env perl +# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +use strict; + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +$flavour = "linux32" if (!$flavour or $flavour eq "void"); + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $arch = sub { +    if ($flavour =~ /linux/)	{ ".arch\t".join(',',@_); } +    else			{ ""; } +}; +my $fpu = sub { +    if ($flavour =~ /linux/)	{ ".fpu\t".join(',',@_); } +    else			{ ""; } +}; +my $hidden = sub { +    if ($flavour =~ /ios/)	{ ".private_extern\t".join(',',@_); } +    else			{ ".hidden\t".join(',',@_); } +}; +my $comm = sub { +    my @args = split(/,\s*/,shift); +    my $name = @args[0]; +    my $global = \$GLOBALS{$name}; +    my $ret; + +    if ($flavour =~ /ios32/)	{ +	$ret = ".comm\t_$name,@args[1]\n"; +	$ret .= ".non_lazy_symbol_pointer\n"; +	$ret .= "$name:\n"; +	$ret .= ".indirect_symbol\t_$name\n"; +	$ret .= ".long\t0"; +	$name = "_$name"; +    } else			{ $ret = ".comm\t".join(',',@args); } + +    $$global = $name; +    $ret; +}; +my $globl = sub { +    my $name = shift; +    my $global = \$GLOBALS{$name}; +    my $ret; + +    SWITCH: for ($flavour) { +	/ios/		&& do { $name = "_$name"; +				last; +			      }; +    } + +    $ret = ".globl	$name" if (!$ret); +    $$global = $name; +    $ret; +}; +my $global = $globl; +my $extern = sub { +    &$globl(@_); +    return;	# return nothing +}; +my $type = sub { +    if ($flavour =~ /linux/)	{ ".type\t".join(',',@_); } +    elsif ($flavour =~ /ios32/)	{ if (join(',',@_) =~ /(\w+),%function/) { +					"#ifdef __thumb2__\n". +					".thumb_func	$1\n". +					"#endif"; +				  } +			        } +    else			{ ""; } +}; +my $size = sub { +    if ($flavour =~ /linux/)	{ ".size\t".join(',',@_); } +    else			{ ""; } +}; +my $inst = sub { +    if ($flavour =~ /linux/)    { ".inst\t".join(',',@_); } +    else                        { ".long\t".join(',',@_); } +}; +my $asciz = sub { +    my $line = join(",",@_); +    if ($line =~ /^"(.*)"$/) +    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	} +    else +    {	"";	} +}; + +sub range { +  my ($r,$sfx,$start,$end) = @_; + +    join(",",map("$r$_$sfx",($start..$end))); +} + +sub expand_line { +  my $line = shift; +  my @ret = (); + +    pos($line)=0; + +    while ($line =~ m/\G[^@\/\{\"]*/g) { +	if ($line =~ m/\G(@|\/\/|$)/gc) { +	    last; +	} +	elsif ($line =~ m/\G\{/gc) { +	    my $saved_pos = pos($line); +	    $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e; +	    pos($line) = $saved_pos; +	    $line =~ m/\G[^\}]*\}/g; +	} +	elsif ($line =~ m/\G\"/gc) { +	    $line =~ m/\G[^\"]*\"/g; +	} +    } + +    $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge; + +    return $line; +} + +while(my $line=<>) { + +    if ($line =~ m/^\s*(#|@|\/\/)/)	{ print $line; next; } + +    $line =~ s|/\*.*\*/||;	# get rid of C-style comments... +    $line =~ s|^\s+||;		# ... and skip white spaces in beginning... +    $line =~ s|\s+$||;		# ... and at the end + +    { +	$line =~ s|[\b\.]L(\w{2,})|L$1|g;	# common denominator for Locallabel +	$line =~ s|\bL(\w{2,})|\.L$1|g	if ($dotinlocallabels); +    } + +    { +	$line =~ s|(^[\.\w]+)\:\s*||; +	my $label = $1; +	if ($label) { +	    printf "%s:",($GLOBALS{$label} or $label); +	} +    } + +    if ($line !~ m/^[#@]/) { +	$line =~ s|^\s*(\.?)(\S+)\s*||; +	my $c = $1; $c = "\t" if ($c eq ""); +	my $mnemonic = $2; +	my $opcode; +	if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) { +	    $opcode = eval("\$$1_$2"); +	} else { +	    $opcode = eval("\$$mnemonic"); +	} + +	my $arg=expand_line($line); + +	if (ref($opcode) eq 'CODE') { +		$line = &$opcode($arg); +	} elsif ($mnemonic)         { +		$line = $c.$mnemonic; +		$line.= "\t$arg" if ($arg ne ""); +	} +    } + +    print $line if ($line); +    print "\n"; +} + +close STDOUT; diff --git a/openssl-1.1.0h/crypto/perlasm/cbc.pl b/openssl-1.1.0h/crypto/perlasm/cbc.pl new file mode 100644 index 0000000..ad79b24 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/cbc.pl @@ -0,0 +1,356 @@ +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) +# des_cblock (*input); +# des_cblock (*output); +# long length; +# des_key_schedule schedule; +# des_cblock (*ivec); +# int enc; +# +# calls  +# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); +# + +#&cbc("des_ncbc_encrypt","des_encrypt",0); +#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", +#	1,4,5,3,5,-1); +#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", +#	0,4,5,3,5,-1); +#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", +#	0,6,7,3,4,5); +# +# When doing a cipher that needs bigendian order, +# for encrypt, the iv is kept in bigendian form, +# while for decrypt, it is kept in little endian. +sub cbc +	{ +	local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; +	# name is the function name +	# enc_func and dec_func and the functions to call for encrypt/decrypt +	# swap is true if byte order needs to be reversed +	# iv_off is parameter number for the iv  +	# enc_off is parameter number for the encrypt/decrypt flag +	# p1,p2,p3 are the offsets for parameters to be passed to the +	# underlying calls. + +	&function_begin_B($name,""); +	&comment(""); + +	$in="esi"; +	$out="edi"; +	$count="ebp"; + +	&push("ebp"); +	&push("ebx"); +	&push("esi"); +	&push("edi"); + +	$data_off=4; +	$data_off+=4 if ($p1 > 0); +	$data_off+=4 if ($p2 > 0); +	$data_off+=4 if ($p3 > 0); + +	&mov($count,	&wparam(2));	# length + +	&comment("getting iv ptr from parameter $iv_off"); +	&mov("ebx",	&wparam($iv_off));	# Get iv ptr + +	&mov($in,	&DWP(0,"ebx","",0));#	iv[0] +	&mov($out,	&DWP(4,"ebx","",0));#	iv[1] + +	&push($out); +	&push($in); +	&push($out);	# used in decrypt for iv[1] +	&push($in);	# used in decrypt for iv[0] + +	&mov("ebx",	"esp");		# This is the address of tin[2] + +	&mov($in,	&wparam(0));	# in +	&mov($out,	&wparam(1));	# out + +	# We have loaded them all, how lets push things +	&comment("getting encrypt flag from parameter $enc_off"); +	&mov("ecx",	&wparam($enc_off));	# Get enc flag +	if ($p3 > 0) +		{ +		&comment("get and push parameter $p3"); +		if ($enc_off != $p3) +			{ &mov("eax",	&wparam($p3)); &push("eax"); } +		else	{ &push("ecx"); } +		} +	if ($p2 > 0) +		{ +		&comment("get and push parameter $p2"); +		if ($enc_off != $p2) +			{ &mov("eax",	&wparam($p2)); &push("eax"); } +		else	{ &push("ecx"); } +		} +	if ($p1 > 0) +		{ +		&comment("get and push parameter $p1"); +		if ($enc_off != $p1) +			{ &mov("eax",	&wparam($p1)); &push("eax"); } +		else	{ &push("ecx"); } +		} +	&push("ebx");		# push data/iv + +	&cmp("ecx",0); +	&jz(&label("decrypt")); + +	&and($count,0xfffffff8); +	&mov("eax",	&DWP($data_off,"esp","",0));	# load iv[0] +	&mov("ebx",	&DWP($data_off+4,"esp","",0));	# load iv[1] + +	&jz(&label("encrypt_finish")); + +	############################################################# + +	&set_label("encrypt_loop"); +	# encrypt start  +	# "eax" and "ebx" hold iv (or the last cipher text) + +	&mov("ecx",	&DWP(0,$in,"",0));	# load first 4 bytes +	&mov("edx",	&DWP(4,$in,"",0));	# second 4 bytes + +	&xor("eax",	"ecx"); +	&xor("ebx",	"edx"); + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP($data_off,"esp","",0),	"eax");	# put in array for call +	&mov(&DWP($data_off+4,"esp","",0),	"ebx");	# + +	&call($enc_func); + +	&mov("eax",	&DWP($data_off,"esp","",0)); +	&mov("ebx",	&DWP($data_off+4,"esp","",0)); + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP(0,$out,"",0),"eax"); +	&mov(&DWP(4,$out,"",0),"ebx"); + +	# eax and ebx are the next iv. + +	&add($in,	8); +	&add($out,	8); + +	&sub($count,	8); +	&jnz(&label("encrypt_loop")); + +###################################################################3 +	&set_label("encrypt_finish"); +	&mov($count,	&wparam(2));	# length +	&and($count,	7); +	&jz(&label("finish")); +	&call(&label("PIC_point")); +&set_label("PIC_point"); +	&blindpop("edx"); +	&lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); +	&mov($count,&DWP(0,"ecx",$count,4)); +	&add($count,"edx"); +	&xor("ecx","ecx"); +	&xor("edx","edx"); +	#&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); +	&jmp_ptr($count); + +&set_label("ej7"); +	&movb(&HB("edx"),	&BP(6,$in,"",0)); +	&shl("edx",8); +&set_label("ej6"); +	&movb(&HB("edx"),	&BP(5,$in,"",0)); +&set_label("ej5"); +	&movb(&LB("edx"),	&BP(4,$in,"",0)); +&set_label("ej4"); +	&mov("ecx",		&DWP(0,$in,"",0)); +	&jmp(&label("ejend")); +&set_label("ej3"); +	&movb(&HB("ecx"),	&BP(2,$in,"",0)); +	&shl("ecx",8); +&set_label("ej2"); +	&movb(&HB("ecx"),	&BP(1,$in,"",0)); +&set_label("ej1"); +	&movb(&LB("ecx"),	&BP(0,$in,"",0)); +&set_label("ejend"); + +	&xor("eax",	"ecx"); +	&xor("ebx",	"edx"); + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP($data_off,"esp","",0),	"eax");	# put in array for call +	&mov(&DWP($data_off+4,"esp","",0),	"ebx");	# + +	&call($enc_func); + +	&mov("eax",	&DWP($data_off,"esp","",0)); +	&mov("ebx",	&DWP($data_off+4,"esp","",0)); + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP(0,$out,"",0),"eax"); +	&mov(&DWP(4,$out,"",0),"ebx"); + +	&jmp(&label("finish")); + +	############################################################# +	############################################################# +	&set_label("decrypt",1); +	# decrypt start  +	&and($count,0xfffffff8); +	# The next 2 instructions are only for if the jz is taken +	&mov("eax",	&DWP($data_off+8,"esp","",0));	# get iv[0] +	&mov("ebx",	&DWP($data_off+12,"esp","",0));	# get iv[1] +	&jz(&label("decrypt_finish")); + +	&set_label("decrypt_loop"); +	&mov("eax",	&DWP(0,$in,"",0));	# load first 4 bytes +	&mov("ebx",	&DWP(4,$in,"",0));	# second 4 bytes + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP($data_off,"esp","",0),	"eax");	# put back +	&mov(&DWP($data_off+4,"esp","",0),	"ebx");	# + +	&call($dec_func); + +	&mov("eax",	&DWP($data_off,"esp","",0));	# get return +	&mov("ebx",	&DWP($data_off+4,"esp","",0));	# + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov("ecx",	&DWP($data_off+8,"esp","",0));	# get iv[0] +	&mov("edx",	&DWP($data_off+12,"esp","",0));	# get iv[1] + +	&xor("ecx",	"eax"); +	&xor("edx",	"ebx"); + +	&mov("eax",	&DWP(0,$in,"",0));	# get old cipher text, +	&mov("ebx",	&DWP(4,$in,"",0));	# next iv actually + +	&mov(&DWP(0,$out,"",0),"ecx"); +	&mov(&DWP(4,$out,"",0),"edx"); + +	&mov(&DWP($data_off+8,"esp","",0),	"eax");	# save iv +	&mov(&DWP($data_off+12,"esp","",0),	"ebx");	# + +	&add($in,	8); +	&add($out,	8); + +	&sub($count,	8); +	&jnz(&label("decrypt_loop")); +############################ ENDIT #######################3 +	&set_label("decrypt_finish"); +	&mov($count,	&wparam(2));	# length +	&and($count,	7); +	&jz(&label("finish")); + +	&mov("eax",	&DWP(0,$in,"",0));	# load first 4 bytes +	&mov("ebx",	&DWP(4,$in,"",0));	# second 4 bytes + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov(&DWP($data_off,"esp","",0),	"eax");	# put back +	&mov(&DWP($data_off+4,"esp","",0),	"ebx");	# + +	&call($dec_func); + +	&mov("eax",	&DWP($data_off,"esp","",0));	# get return +	&mov("ebx",	&DWP($data_off+4,"esp","",0));	# + +	&bswap("eax")	if $swap; +	&bswap("ebx")	if $swap; + +	&mov("ecx",	&DWP($data_off+8,"esp","",0));	# get iv[0] +	&mov("edx",	&DWP($data_off+12,"esp","",0));	# get iv[1] + +	&xor("ecx",	"eax"); +	&xor("edx",	"ebx"); + +	# this is for when we exit +	&mov("eax",	&DWP(0,$in,"",0));	# get old cipher text, +	&mov("ebx",	&DWP(4,$in,"",0));	# next iv actually + +&set_label("dj7"); +	&rotr("edx",	16); +	&movb(&BP(6,$out,"",0),	&LB("edx")); +	&shr("edx",16); +&set_label("dj6"); +	&movb(&BP(5,$out,"",0),	&HB("edx")); +&set_label("dj5"); +	&movb(&BP(4,$out,"",0),	&LB("edx")); +&set_label("dj4"); +	&mov(&DWP(0,$out,"",0),	"ecx"); +	&jmp(&label("djend")); +&set_label("dj3"); +	&rotr("ecx",	16); +	&movb(&BP(2,$out,"",0),	&LB("ecx")); +	&shl("ecx",16); +&set_label("dj2"); +	&movb(&BP(1,$in,"",0),	&HB("ecx")); +&set_label("dj1"); +	&movb(&BP(0,$in,"",0),	&LB("ecx")); +&set_label("djend"); + +	# final iv is still in eax:ebx +	&jmp(&label("finish")); + + +############################ FINISH #######################3 +	&set_label("finish",1); +	&mov("ecx",	&wparam($iv_off));	# Get iv ptr + +	################################################# +	$total=16+4; +	$total+=4 if ($p1 > 0); +	$total+=4 if ($p2 > 0); +	$total+=4 if ($p3 > 0); +	&add("esp",$total); + +	&mov(&DWP(0,"ecx","",0),	"eax");	# save iv +	&mov(&DWP(4,"ecx","",0),	"ebx");	# save iv + +	&function_end_A($name); + +	&align(64); +	&set_label("cbc_enc_jmp_table"); +	&data_word("0"); +	&data_word(&label("ej1")."-".&label("PIC_point")); +	&data_word(&label("ej2")."-".&label("PIC_point")); +	&data_word(&label("ej3")."-".&label("PIC_point")); +	&data_word(&label("ej4")."-".&label("PIC_point")); +	&data_word(&label("ej5")."-".&label("PIC_point")); +	&data_word(&label("ej6")."-".&label("PIC_point")); +	&data_word(&label("ej7")."-".&label("PIC_point")); +	# not used +	#&set_label("cbc_dec_jmp_table",1); +	#&data_word("0"); +	#&data_word(&label("dj1")."-".&label("PIC_point")); +	#&data_word(&label("dj2")."-".&label("PIC_point")); +	#&data_word(&label("dj3")."-".&label("PIC_point")); +	#&data_word(&label("dj4")."-".&label("PIC_point")); +	#&data_word(&label("dj5")."-".&label("PIC_point")); +	#&data_word(&label("dj6")."-".&label("PIC_point")); +	#&data_word(&label("dj7")."-".&label("PIC_point")); +	&align(64); + +	&function_end_B($name); +	 +	} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl new file mode 100755 index 0000000..2d46e24 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/ppc-xlate.pl @@ -0,0 +1,265 @@ +#! /usr/bin/env perl +# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $globl = sub { +    my $junk = shift; +    my $name = shift; +    my $global = \$GLOBALS{$name}; +    my $ret; + +    $name =~ s|^[\.\_]||; +  +    SWITCH: for ($flavour) { +	/aix/		&& do { $name = ".$name"; +				last; +			      }; +	/osx/		&& do { $name = "_$name"; +				last; +			      }; +	/linux.*(32|64le)/ +			&& do {	$ret .= ".globl	$name\n"; +				$ret .= ".type	$name,\@function"; +				last; +			      }; +	/linux.*64/	&& do {	$ret .= ".globl	$name\n"; +				$ret .= ".type	$name,\@function\n"; +				$ret .= ".section	\".opd\",\"aw\"\n"; +				$ret .= ".align	3\n"; +				$ret .= "$name:\n"; +				$ret .= ".quad	.$name,.TOC.\@tocbase,0\n"; +				$ret .= ".previous\n"; + +				$name = ".$name"; +				last; +			      }; +    } + +    $ret = ".globl	$name" if (!$ret); +    $$global = $name; +    $ret; +}; +my $text = sub { +    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; +    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/); +    $ret; +}; +my $machine = sub { +    my $junk = shift; +    my $arch = shift; +    if ($flavour =~ /osx/) +    {	$arch =~ s/\"//g; +	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); +    } +    ".machine	$arch"; +}; +my $size = sub { +    if ($flavour =~ /linux/) +    {	shift; +	my $name = shift; $name =~ s|^[\.\_]||; +	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name; +	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/); +	$ret; +    } +    else +    {	"";	} +}; +my $asciz = sub { +    shift; +    my $line = join(",",@_); +    if ($line =~ /^"(.*)"$/) +    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	} +    else +    {	"";	} +}; +my $quad = sub { +    shift; +    my @ret; +    my ($hi,$lo); +    for (@_) { +	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) +	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  } +	elsif (/^([0-9]+)$/o) +	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl +	else +	{  $hi=undef; $lo=$_; } + +	if (defined($hi)) +	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  } +	else +	{  push(@ret,".quad	$lo");  } +    } +    join("\n",@ret); +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { +    my $f = shift; +    my $cr = 0; $cr = shift if ($#_>1); +    # Some out-of-date 32-bit GNU assembler just can't handle cmplw... +    ($flavour =~ /linux.*32/) ? +	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : +	"	cmplw	".join(',',$cr,@_); +}; +my $bdnz = sub { +    my $f = shift; +    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint +    "	bc	$bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { +    my $f = shift; +    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint +    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints +	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 : +	"	bclr	$bo,0"; +}; +my $bnelr = sub { +    my $f = shift; +    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint +    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints +	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : +	"	bclr	$bo,2"; +}; +my $beqlr = sub { +    my $f = shift; +    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint +    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints +	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : +	"	bclr	$bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { +    my ($f,$ra,$rs,$n,$b) = @_; +    $b = ($b+$n)&63; $n = 64-$n; +    "	rldicl	$ra,$rs,$b,$n"; +}; +my $vmr = sub { +    my ($f,$vx,$vy) = @_; +    "	vor	$vx,$vy,$vy"; +}; + +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /aix|linux64le/); +my $mtspr = sub { +    my ($f,$idx,$ra) = @_; +    if ($idx == 256 && $no_vrsave) { +	"	or	$ra,$ra,$ra"; +    } else { +	"	mtspr	$idx,$ra"; +    } +}; +my $mfspr = sub { +    my ($f,$rd,$idx) = @_; +    if ($idx == 256 && $no_vrsave) { +	"	li	$rd,-1"; +    } else { +	"	mfspr	$rd,$idx"; +    } +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { +    my ($f, $vrt, $ra, $rb, $op) = @_; +    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x +my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x +my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx +my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx +my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x +my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { +    my ($f, $vrt, $vra, $vrb, $op) = @_; +    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher	= sub { vcrypto_op(@_, 1288); }; +my $vcipherlast	= sub { vcrypto_op(@_, 1289); }; +my $vncipher	= sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox	= sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb	= sub { vcrypto_op(@_, 1032); }; +my $vpmsumd	= sub { vcrypto_op(@_, 1224); }; +my $vpmsubh	= sub { vcrypto_op(@_, 1096); }; +my $vpmsumw	= sub { vcrypto_op(@_, 1160); }; +my $vaddudm	= sub { vcrypto_op(@_, 192);  }; + +my $mtsle	= sub { +    my ($f, $arg) = @_; +    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + +# PowerISA 3.0 stuff +my $maddhdu = sub { +    my ($f, $rt, $ra, $rb, $rc) = @_; +    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49; +}; +my $maddld = sub { +    my ($f, $rt, $ra, $rb, $rc) = @_; +    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51; +}; + +my $darn = sub { +    my ($f, $rt, $l) = @_; +    "	.long	".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); +}; + +while($line=<>) { + +    $line =~ s|[#!;].*$||;	# get rid of asm-style comments... +    $line =~ s|/\*.*\*/||;	# ... and C-style comments... +    $line =~ s|^\s+||;		# ... and skip white spaces in beginning... +    $line =~ s|\s+$||;		# ... and at the end + +    { +	$line =~ s|\b\.L(\w+)|L$1|g;	# common denominator for Locallabel +	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels); +    } + +    { +	$line =~ s|(^[\.\w]+)\:\s*||; +	my $label = $1; +	if ($label) { +	    printf "%s:",($GLOBALS{$label} or $label); +	    printf "\n.localentry\t$GLOBALS{$label},0"	if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); +	} +    } + +    { +	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; +	my $c = $1; $c = "\t" if ($c eq ""); +	my $mnemonic = $2; +	my $f = $3; +	my $opcode = eval("\$$mnemonic"); +	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); +	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } +	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; } +    } + +    print $line if ($line); +    print "\n"; +} + +close STDOUT; diff --git a/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl new file mode 100644 index 0000000..bfdada8 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/sparcv9_modes.pl @@ -0,0 +1,1702 @@ +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# Specific modes implementations for SPARC Architecture 2011. There +# is T4 dependency though, an ASI value that is not specified in the +# Architecture Manual. But as SPARC universe is rather monocultural, +# we imply that processor capable of executing crypto instructions +# can handle the ASI in question as well. This means that we ought to +# keep eyes open when new processors emerge... +# +# As for above mentioned ASI. It's so called "block initializing +# store" which cancels "read" in "read-update-write" on cache lines. +# This is "cooperative" optimization, as it reduces overall pressure +# on memory interface. Benefits can't be observed/quantified with +# usual benchmarks, on the contrary you can notice that single-thread +# performance for parallelizable modes is ~1.5% worse for largest +# block sizes [though few percent better for not so long ones]. All +# this based on suggestions from David Miller. + +$::bias="STACK_BIAS"; +$::frame="STACK_FRAME"; +$::size_t_cc="SIZE_T_CC"; + +sub asm_init {		# to be called with @ARGV as argument +    for (@_)		{ $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); } +    if ($::abibits==64)	{ $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } +    else		{ $::bias=0;    $::frame=112; $::size_t_cc="%icc"; } +} + +# unified interface +my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); +# local variables +my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7)); + +sub alg_cbc_encrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl	${alg}${bits}_t4_cbc_encrypt +.align	32 +${alg}${bits}_t4_cbc_encrypt: +	save		%sp, -$::frame, %sp +	cmp		$len, 0 +	be,pn		$::size_t_cc, .L${bits}_cbc_enc_abort +	srln		$len, 0, $len		! needed on v8+, "nop" on v9 +	sub		$inp, $out, $blk_init	! $inp!=$out +___ +$::code.=<<___ if (!$::evp); +	andcc		$ivec, 7, $ivoff +	alignaddr	$ivec, %g0, $ivec + +	ldd		[$ivec + 0], %f0	! load ivec +	bz,pt		%icc, 1f +	ldd		[$ivec + 8], %f2 +	ldd		[$ivec + 16], %f4 +	faligndata	%f0, %f2, %f0 +	faligndata	%f2, %f4, %f2 +1: +___ +$::code.=<<___ if ($::evp); +	ld		[$ivec + 0], %f0 +	ld		[$ivec + 4], %f1 +	ld		[$ivec + 8], %f2 +	ld		[$ivec + 12], %f3 +___ +$::code.=<<___; +	prefetch	[$inp], 20 +	prefetch	[$inp + 63], 20 +	call		_${alg}${bits}_load_enckey +	and		$inp, 7, $ileft +	andn		$inp, 7, $inp +	sll		$ileft, 3, $ileft +	mov		64, $iright +	mov		0xff, $omask +	sub		$iright, $ileft, $iright +	and		$out, 7, $ooff +	cmp		$len, 127 +	movrnz		$ooff, 0, $blk_init		! if (	$out&7 || +	movleu		$::size_t_cc, 0, $blk_init	!	$len<128 || +	brnz,pn		$blk_init, .L${bits}cbc_enc_blk	!	$inp==$out) +	srl		$omask, $ooff, $omask + +	alignaddrl	$out, %g0, $out +	srlx		$len, 4, $len +	prefetch	[$out], 22 + +.L${bits}_cbc_enc_loop: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 4f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +4: +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	movxtod		%o0, %f12 +	movxtod		%o1, %f14 + +	fxor		%f12, %f0, %f0		! ^= ivec +	fxor		%f14, %f2, %f2 +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 16+63], 20 +	call		_${alg}${bits}_encrypt_1x +	add		$inp, 16, $inp + +	brnz,pn		$ooff, 2f +	sub		$len, 1, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	brnz,pt		$len, .L${bits}_cbc_enc_loop +	add		$out, 16, $out +___ +$::code.=<<___ if ($::evp); +	st		%f0, [$ivec + 0] +	st		%f1, [$ivec + 4] +	st		%f2, [$ivec + 8] +	st		%f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, 3f +	nop + +	std		%f0, [$ivec + 0]	! write out ivec +	std		%f2, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_enc_abort: +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f4		! handle unaligned output +	faligndata	%f0, %f2, %f6 +	faligndata	%f2, %f2, %f8 + +	stda		%f4, [$out + $omask]0xc0	! partial store +	std		%f6, [$out + 8] +	add		$out, 16, $out +	orn		%g0, $omask, $omask +	stda		%f8, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_cbc_enc_loop+4 +	orn		%g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); +	st		%f0, [$ivec + 0] +	st		%f1, [$ivec + 4] +	st		%f2, [$ivec + 8] +	st		%f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, 3f +	nop + +	std		%f0, [$ivec + 0]	! write out ivec +	std		%f2, [$ivec + 8] +	ret +	restore + +.align	16 +3:	alignaddrl	$ivec, $ivoff, %g0	! handle unaligned ivec +	mov		0xff, $omask +	srl		$omask, $ivoff, $omask +	faligndata	%f0, %f0, %f4 +	faligndata	%f0, %f2, %f6 +	faligndata	%f2, %f2, %f8 +	stda		%f4, [$ivec + $omask]0xc0 +	std		%f6, [$ivec + 8] +	add		$ivec, 16, $ivec +	orn		%g0, $omask, $omask +	stda		%f8, [$ivec + $omask]0xc0 +___ +$::code.=<<___; +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}cbc_enc_blk: +	add	$out, $len, $blk_init +	and	$blk_init, 63, $blk_init	! tail +	sub	$len, $blk_init, $len +	add	$blk_init, 15, $blk_init	! round up to 16n +	srlx	$len, 4, $len +	srl	$blk_init, 4, $blk_init + +.L${bits}_cbc_enc_blk_loop: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 5f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +5: +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	movxtod		%o0, %f12 +	movxtod		%o1, %f14 + +	fxor		%f12, %f0, %f0		! ^= ivec +	fxor		%f14, %f2, %f2 +	prefetch	[$inp + 16+63], 20 +	call		_${alg}${bits}_encrypt_1x +	add		$inp, 16, $inp +	sub		$len, 1, $len +		 +	stda		%f0, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f2, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	brnz,pt		$len, .L${bits}_cbc_enc_blk_loop +	add		$out, 8, $out + +	membar		#StoreLoad|#StoreStore +	brnz,pt		$blk_init, .L${bits}_cbc_enc_loop +	mov		$blk_init, $len +___ +$::code.=<<___ if ($::evp); +	st		%f0, [$ivec + 0] +	st		%f1, [$ivec + 4] +	st		%f2, [$ivec + 8] +	st		%f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, 3b +	nop + +	std		%f0, [$ivec + 0]	! write out ivec +	std		%f2, [$ivec + 8] +___ +$::code.=<<___; +	ret +	restore +.type	${alg}${bits}_t4_cbc_encrypt,#function +.size	${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt +___ +} + +sub alg_cbc_decrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl	${alg}${bits}_t4_cbc_decrypt +.align	32 +${alg}${bits}_t4_cbc_decrypt: +	save		%sp, -$::frame, %sp +	cmp		$len, 0 +	be,pn		$::size_t_cc, .L${bits}_cbc_dec_abort +	srln		$len, 0, $len		! needed on v8+, "nop" on v9 +	sub		$inp, $out, $blk_init	! $inp!=$out +___ +$::code.=<<___ if (!$::evp); +	andcc		$ivec, 7, $ivoff +	alignaddr	$ivec, %g0, $ivec + +	ldd		[$ivec + 0], %f12	! load ivec +	bz,pt		%icc, 1f +	ldd		[$ivec + 8], %f14 +	ldd		[$ivec + 16], %f0 +	faligndata	%f12, %f14, %f12 +	faligndata	%f14, %f0, %f14 +1: +___ +$::code.=<<___ if ($::evp); +	ld		[$ivec + 0], %f12	! load ivec +	ld		[$ivec + 4], %f13 +	ld		[$ivec + 8], %f14 +	ld		[$ivec + 12], %f15 +___ +$::code.=<<___; +	prefetch	[$inp], 20 +	prefetch	[$inp + 63], 20 +	call		_${alg}${bits}_load_deckey +	and		$inp, 7, $ileft +	andn		$inp, 7, $inp +	sll		$ileft, 3, $ileft +	mov		64, $iright +	mov		0xff, $omask +	sub		$iright, $ileft, $iright +	and		$out, 7, $ooff +	cmp		$len, 255 +	movrnz		$ooff, 0, $blk_init		! if (	$out&7 || +	movleu		$::size_t_cc, 0, $blk_init	!	$len<256 || +	brnz,pn		$blk_init, .L${bits}cbc_dec_blk	!	$inp==$out) +	srl		$omask, $ooff, $omask + +	andcc		$len, 16, %g0		! is number of blocks even? +	srlx		$len, 4, $len +	alignaddrl	$out, %g0, $out +	bz		%icc, .L${bits}_cbc_dec_loop2x +	prefetch	[$out], 22 +.L${bits}_cbc_dec_loop: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 4f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +4: +	xor		%g4, %o0, %o2		! ^= rk[0] +	xor		%g5, %o1, %o3 +	movxtod		%o2, %f0 +	movxtod		%o3, %f2 + +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 16+63], 20 +	call		_${alg}${bits}_decrypt_1x +	add		$inp, 16, $inp + +	fxor		%f12, %f0, %f0		! ^= ivec +	fxor		%f14, %f2, %f2 +	movxtod		%o0, %f12 +	movxtod		%o1, %f14 + +	brnz,pn		$ooff, 2f +	sub		$len, 1, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	brnz,pt		$len, .L${bits}_cbc_dec_loop2x +	add		$out, 16, $out +___ +$::code.=<<___ if ($::evp); +	st		%f12, [$ivec + 0] +	st		%f13, [$ivec + 4] +	st		%f14, [$ivec + 8] +	st		%f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, .L${bits}_cbc_dec_unaligned_ivec +	nop + +	std		%f12, [$ivec + 0]	! write out ivec +	std		%f14, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_dec_abort: +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f4		! handle unaligned output +	faligndata	%f0, %f2, %f6 +	faligndata	%f2, %f2, %f8 + +	stda		%f4, [$out + $omask]0xc0	! partial store +	std		%f6, [$out + 8] +	add		$out, 16, $out +	orn		%g0, $omask, $omask +	stda		%f8, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_cbc_dec_loop2x+4 +	orn		%g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); +	st		%f12, [$ivec + 0] +	st		%f13, [$ivec + 4] +	st		%f14, [$ivec + 8] +	st		%f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, .L${bits}_cbc_dec_unaligned_ivec +	nop + +	std		%f12, [$ivec + 0]	! write out ivec +	std		%f14, [$ivec + 8] +___ +$::code.=<<___; +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}_cbc_dec_loop2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 4f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +4: +	xor		%g4, %o0, %o4		! ^= rk[0] +	xor		%g5, %o1, %o5 +	movxtod		%o4, %f0 +	movxtod		%o5, %f2 +	xor		%g4, %o2, %o4 +	xor		%g5, %o3, %o5 +	movxtod		%o4, %f4 +	movxtod		%o5, %f6 + +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 32+63], 20 +	call		_${alg}${bits}_decrypt_2x +	add		$inp, 32, $inp + +	movxtod		%o0, %f8 +	movxtod		%o1, %f10 +	fxor		%f12, %f0, %f0		! ^= ivec +	fxor		%f14, %f2, %f2 +	movxtod		%o2, %f12 +	movxtod		%o3, %f14 +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 + +	brnz,pn		$ooff, 2f +	sub		$len, 2, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	std		%f4, [$out + 16] +	std		%f6, [$out + 24] +	brnz,pt		$len, .L${bits}_cbc_dec_loop2x +	add		$out, 32, $out +___ +$::code.=<<___ if ($::evp); +	st		%f12, [$ivec + 0] +	st		%f13, [$ivec + 4] +	st		%f14, [$ivec + 8] +	st		%f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, .L${bits}_cbc_dec_unaligned_ivec +	nop + +	std		%f12, [$ivec + 0]	! write out ivec +	std		%f14, [$ivec + 8] +___ +$::code.=<<___; +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f8		! handle unaligned output +	faligndata	%f0, %f2, %f0 +	faligndata	%f2, %f4, %f2 +	faligndata	%f4, %f6, %f4 +	faligndata	%f6, %f6, %f6 +	stda		%f8, [$out + $omask]0xc0	! partial store +	std		%f0, [$out + 8] +	std		%f2, [$out + 16] +	std		%f4, [$out + 24] +	add		$out, 32, $out +	orn		%g0, $omask, $omask +	stda		%f6, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_cbc_dec_loop2x+4 +	orn		%g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); +	st		%f12, [$ivec + 0] +	st		%f13, [$ivec + 4] +	st		%f14, [$ivec + 8] +	st		%f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, .L${bits}_cbc_dec_unaligned_ivec +	nop + +	std		%f12, [$ivec + 0]	! write out ivec +	std		%f14, [$ivec + 8] +	ret +	restore + +.align	16 +.L${bits}_cbc_dec_unaligned_ivec: +	alignaddrl	$ivec, $ivoff, %g0	! handle unaligned ivec +	mov		0xff, $omask +	srl		$omask, $ivoff, $omask +	faligndata	%f12, %f12, %f0 +	faligndata	%f12, %f14, %f2 +	faligndata	%f14, %f14, %f4 +	stda		%f0, [$ivec + $omask]0xc0 +	std		%f2, [$ivec + 8] +	add		$ivec, 16, $ivec +	orn		%g0, $omask, $omask +	stda		%f4, [$ivec + $omask]0xc0 +___ +$::code.=<<___; +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}cbc_dec_blk: +	add	$out, $len, $blk_init +	and	$blk_init, 63, $blk_init	! tail +	sub	$len, $blk_init, $len +	add	$blk_init, 15, $blk_init	! round up to 16n +	srlx	$len, 4, $len +	srl	$blk_init, 4, $blk_init +	sub	$len, 1, $len +	add	$blk_init, 1, $blk_init + +.L${bits}_cbc_dec_blk_loop2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 5f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +5: +	xor		%g4, %o0, %o4		! ^= rk[0] +	xor		%g5, %o1, %o5 +	movxtod		%o4, %f0 +	movxtod		%o5, %f2 +	xor		%g4, %o2, %o4 +	xor		%g5, %o3, %o5 +	movxtod		%o4, %f4 +	movxtod		%o5, %f6 + +	prefetch	[$inp + 32+63], 20 +	call		_${alg}${bits}_decrypt_2x +	add		$inp, 32, $inp +	subcc		$len, 2, $len + +	movxtod		%o0, %f8 +	movxtod		%o1, %f10 +	fxor		%f12, %f0, %f0		! ^= ivec +	fxor		%f14, %f2, %f2 +	movxtod		%o2, %f12 +	movxtod		%o3, %f14 +	fxor		%f8, %f4, %f4 +	fxor		%f10, %f6, %f6 + +	stda		%f0, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f2, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f4, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f6, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	bgu,pt		$::size_t_cc, .L${bits}_cbc_dec_blk_loop2x +	add		$out, 8, $out + +	add		$blk_init, $len, $len +	andcc		$len, 1, %g0		! is number of blocks even? +	membar		#StoreLoad|#StoreStore +	bnz,pt		%icc, .L${bits}_cbc_dec_loop +	srl		$len, 0, $len +	brnz,pn		$len, .L${bits}_cbc_dec_loop2x +	nop +___ +$::code.=<<___ if ($::evp); +	st		%f12, [$ivec + 0]	! write out ivec +	st		%f13, [$ivec + 4] +	st		%f14, [$ivec + 8] +	st		%f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); +	brnz,pn		$ivoff, 3b +	nop + +	std		%f12, [$ivec + 0]	! write out ivec +	std		%f14, [$ivec + 8] +___ +$::code.=<<___; +	ret +	restore +.type	${alg}${bits}_t4_cbc_decrypt,#function +.size	${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt +___ +} + +sub alg_ctr32_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl	${alg}${bits}_t4_ctr32_encrypt +.align	32 +${alg}${bits}_t4_ctr32_encrypt: +	save		%sp, -$::frame, %sp +	srln		$len, 0, $len		! needed on v8+, "nop" on v9 + +	prefetch	[$inp], 20 +	prefetch	[$inp + 63], 20 +	call		_${alg}${bits}_load_enckey +	sllx		$len, 4, $len + +	ld		[$ivec + 0], %l4	! counter +	ld		[$ivec + 4], %l5 +	ld		[$ivec + 8], %l6 +	ld		[$ivec + 12], %l7 + +	sllx		%l4, 32, %o5 +	or		%l5, %o5, %o5 +	sllx		%l6, 32, %g1 +	xor		%o5, %g4, %g4		! ^= rk[0] +	xor		%g1, %g5, %g5 +	movxtod		%g4, %f14		! most significant 64 bits + +	sub		$inp, $out, $blk_init	! $inp!=$out +	and		$inp, 7, $ileft +	andn		$inp, 7, $inp +	sll		$ileft, 3, $ileft +	mov		64, $iright +	mov		0xff, $omask +	sub		$iright, $ileft, $iright +	and		$out, 7, $ooff +	cmp		$len, 255 +	movrnz		$ooff, 0, $blk_init		! if (	$out&7 || +	movleu		$::size_t_cc, 0, $blk_init	!	$len<256 || +	brnz,pn		$blk_init, .L${bits}_ctr32_blk	!	$inp==$out) +	srl		$omask, $ooff, $omask + +	andcc		$len, 16, %g0		! is number of blocks even? +	alignaddrl	$out, %g0, $out +	bz		%icc, .L${bits}_ctr32_loop2x +	srlx		$len, 4, $len +.L${bits}_ctr32_loop: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 4f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +4: +	xor		%g5, %l7, %g1		! ^= rk[0] +	add		%l7, 1, %l7 +	movxtod		%g1, %f2 +	srl		%l7, 0, %l7		! clruw +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 16+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); +	aes_eround01	%f16, %f14, %f2, %f4 +	aes_eround23	%f18, %f14, %f2, %f2 +___ +$::code.=<<___ if ($alg eq "cmll"); +	camellia_f	%f16, %f2, %f14, %f2 +	camellia_f	%f18, %f14, %f2, %f0 +___ +$::code.=<<___; +	call		_${alg}${bits}_encrypt_1x+8 +	add		$inp, 16, $inp + +	movxtod		%o0, %f10 +	movxtod		%o1, %f12 +	fxor		%f10, %f0, %f0		! ^= inp +	fxor		%f12, %f2, %f2 + +	brnz,pn		$ooff, 2f +	sub		$len, 1, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	brnz,pt		$len, .L${bits}_ctr32_loop2x +	add		$out, 16, $out + +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f4		! handle unaligned output +	faligndata	%f0, %f2, %f6 +	faligndata	%f2, %f2, %f8 +	stda		%f4, [$out + $omask]0xc0	! partial store +	std		%f6, [$out + 8] +	add		$out, 16, $out +	orn		%g0, $omask, $omask +	stda		%f8, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_ctr32_loop2x+4 +	orn		%g0, $omask, $omask + +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}_ctr32_loop2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 4f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +4: +	xor		%g5, %l7, %g1		! ^= rk[0] +	add		%l7, 1, %l7 +	movxtod		%g1, %f2 +	srl		%l7, 0, %l7		! clruw +	xor		%g5, %l7, %g1 +	add		%l7, 1, %l7 +	movxtod		%g1, %f6 +	srl		%l7, 0, %l7		! clruw +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); +	aes_eround01	%f16, %f14, %f2, %f8 +	aes_eround23	%f18, %f14, %f2, %f2 +	aes_eround01	%f16, %f14, %f6, %f10 +	aes_eround23	%f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); +	camellia_f	%f16, %f2, %f14, %f2 +	camellia_f	%f16, %f6, %f14, %f6 +	camellia_f	%f18, %f14, %f2, %f0 +	camellia_f	%f18, %f14, %f6, %f4 +___ +$::code.=<<___; +	call		_${alg}${bits}_encrypt_2x+16 +	add		$inp, 32, $inp + +	movxtod		%o0, %f8 +	movxtod		%o1, %f10 +	movxtod		%o2, %f12 +	fxor		%f8, %f0, %f0		! ^= inp +	movxtod		%o3, %f8 +	fxor		%f10, %f2, %f2 +	fxor		%f12, %f4, %f4 +	fxor		%f8, %f6, %f6 + +	brnz,pn		$ooff, 2f +	sub		$len, 2, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	std		%f4, [$out + 16] +	std		%f6, [$out + 24] +	brnz,pt		$len, .L${bits}_ctr32_loop2x +	add		$out, 32, $out + +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f8		! handle unaligned output +	faligndata	%f0, %f2, %f0 +	faligndata	%f2, %f4, %f2 +	faligndata	%f4, %f6, %f4 +	faligndata	%f6, %f6, %f6 + +	stda		%f8, [$out + $omask]0xc0	! partial store +	std		%f0, [$out + 8] +	std		%f2, [$out + 16] +	std		%f4, [$out + 24] +	add		$out, 32, $out +	orn		%g0, $omask, $omask +	stda		%f6, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_ctr32_loop2x+4 +	orn		%g0, $omask, $omask + +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}_ctr32_blk: +	add	$out, $len, $blk_init +	and	$blk_init, 63, $blk_init	! tail +	sub	$len, $blk_init, $len +	add	$blk_init, 15, $blk_init	! round up to 16n +	srlx	$len, 4, $len +	srl	$blk_init, 4, $blk_init +	sub	$len, 1, $len +	add	$blk_init, 1, $blk_init + +.L${bits}_ctr32_blk_loop2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 5f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +5: +	xor		%g5, %l7, %g1		! ^= rk[0] +	add		%l7, 1, %l7 +	movxtod		%g1, %f2 +	srl		%l7, 0, %l7		! clruw +	xor		%g5, %l7, %g1 +	add		%l7, 1, %l7 +	movxtod		%g1, %f6 +	srl		%l7, 0, %l7		! clruw +	prefetch	[$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); +	aes_eround01	%f16, %f14, %f2, %f8 +	aes_eround23	%f18, %f14, %f2, %f2 +	aes_eround01	%f16, %f14, %f6, %f10 +	aes_eround23	%f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); +	camellia_f	%f16, %f2, %f14, %f2 +	camellia_f	%f16, %f6, %f14, %f6 +	camellia_f	%f18, %f14, %f2, %f0 +	camellia_f	%f18, %f14, %f6, %f4 +___ +$::code.=<<___; +	call		_${alg}${bits}_encrypt_2x+16 +	add		$inp, 32, $inp +	subcc		$len, 2, $len + +	movxtod		%o0, %f8 +	movxtod		%o1, %f10 +	movxtod		%o2, %f12 +	fxor		%f8, %f0, %f0		! ^= inp +	movxtod		%o3, %f8 +	fxor		%f10, %f2, %f2 +	fxor		%f12, %f4, %f4 +	fxor		%f8, %f6, %f6 + +	stda		%f0, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f2, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f4, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f6, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	bgu,pt		$::size_t_cc, .L${bits}_ctr32_blk_loop2x +	add		$out, 8, $out + +	add		$blk_init, $len, $len +	andcc		$len, 1, %g0		! is number of blocks even? +	membar		#StoreLoad|#StoreStore +	bnz,pt		%icc, .L${bits}_ctr32_loop +	srl		$len, 0, $len +	brnz,pn		$len, .L${bits}_ctr32_loop2x +	nop + +	ret +	restore +.type	${alg}${bits}_t4_ctr32_encrypt,#function +.size	${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt +___ +} + +sub alg_xts_implement { +my ($alg,$bits,$dir) = @_; +my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5)); +my $rem=$ivec; + +$::code.=<<___; +.globl	${alg}${bits}_t4_xts_${dir}crypt +.align	32 +${alg}${bits}_t4_xts_${dir}crypt: +	save		%sp, -$::frame-16, %sp +	srln		$len, 0, $len		! needed on v8+, "nop" on v9 + +	mov		$ivec, %o0 +	add		%fp, $::bias-16, %o1 +	call		${alg}_t4_encrypt +	mov		$key2, %o2 + +	add		%fp, $::bias-16, %l7 +	ldxa		[%l7]0x88, %g2 +	add		%fp, $::bias-8, %l7 +	ldxa		[%l7]0x88, %g3		! %g3:%g2 is tweak + +	sethi		%hi(0x76543210), %l7 +	or		%l7, %lo(0x76543210), %l7 +	bmask		%l7, %g0, %g0		! byte swap mask + +	prefetch	[$inp], 20 +	prefetch	[$inp + 63], 20 +	call		_${alg}${bits}_load_${dir}ckey +	and		$len, 15,  $rem +	and		$len, -16, $len +___ +$code.=<<___ if ($dir eq "de"); +	mov		0, %l7 +	movrnz		$rem, 16,  %l7 +	sub		$len, %l7, $len +___ +$code.=<<___; + +	sub		$inp, $out, $blk_init	! $inp!=$out +	and		$inp, 7, $ileft +	andn		$inp, 7, $inp +	sll		$ileft, 3, $ileft +	mov		64, $iright +	mov		0xff, $omask +	sub		$iright, $ileft, $iright +	and		$out, 7, $ooff +	cmp		$len, 255 +	movrnz		$ooff, 0, $blk_init		! if (	$out&7 || +	movleu		$::size_t_cc, 0, $blk_init	!	$len<256 || +	brnz,pn		$blk_init, .L${bits}_xts_${dir}blk !	$inp==$out) +	srl		$omask, $ooff, $omask + +	andcc		$len, 16, %g0		! is number of blocks even? +___ +$code.=<<___ if ($dir eq "de"); +	brz,pn		$len, .L${bits}_xts_${dir}steal +___ +$code.=<<___; +	alignaddrl	$out, %g0, $out +	bz		%icc, .L${bits}_xts_${dir}loop2x +	srlx		$len, 4, $len +.L${bits}_xts_${dir}loop: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 4f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +4: +	movxtod		%g2, %f12 +	movxtod		%g3, %f14 +	bshuffle	%f12, %f12, %f12 +	bshuffle	%f14, %f14, %f14 + +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	movxtod		%o0, %f0 +	movxtod		%o1, %f2 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 + +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 16+63], 20 +	call		_${alg}${bits}_${dir}crypt_1x +	add		$inp, 16, $inp + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 + +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %g2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %g3 +	xor		%l7, %g2, %g2 + +	brnz,pn		$ooff, 2f +	sub		$len, 1, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	brnz,pt		$len, .L${bits}_xts_${dir}loop2x +	add		$out, 16, $out + +	brnz,pn		$rem, .L${bits}_xts_${dir}steal +	nop + +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f4		! handle unaligned output +	faligndata	%f0, %f2, %f6 +	faligndata	%f2, %f2, %f8 +	stda		%f4, [$out + $omask]0xc0	! partial store +	std		%f6, [$out + 8] +	add		$out, 16, $out +	orn		%g0, $omask, $omask +	stda		%f8, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_xts_${dir}loop2x+4 +	orn		%g0, $omask, $omask + +	brnz,pn		$rem, .L${bits}_xts_${dir}steal +	nop + +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}_xts_${dir}loop2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 4f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +4: +	movxtod		%g2, %f12 +	movxtod		%g3, %f14 +	bshuffle	%f12, %f12, %f12 +	bshuffle	%f14, %f14, %f14 + +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %g2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %g3 +	xor		%l7, %g2, %g2 + +	movxtod		%g2, %f8 +	movxtod		%g3, %f10 +	bshuffle	%f8,  %f8,  %f8 +	bshuffle	%f10, %f10, %f10 + +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	xor		%g4, %o2, %o2		! ^= rk[0] +	xor		%g5, %o3, %o3 +	movxtod		%o0, %f0 +	movxtod		%o1, %f2 +	movxtod		%o2, %f4 +	movxtod		%o3, %f6 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 +	fxor		%f8,  %f4, %f4		! ^= tweak[0] +	fxor		%f10, %f6, %f6 + +	prefetch	[$out + 63], 22 +	prefetch	[$inp + 32+63], 20 +	call		_${alg}${bits}_${dir}crypt_2x +	add		$inp, 32, $inp + +	movxtod		%g2, %f8 +	movxtod		%g3, %f10 + +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %g2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %g3 +	xor		%l7, %g2, %g2 + +	bshuffle	%f8,  %f8,  %f8 +	bshuffle	%f10, %f10, %f10 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 +	fxor		%f8,  %f4, %f4 +	fxor		%f10, %f6, %f6 + +	brnz,pn		$ooff, 2f +	sub		$len, 2, $len +		 +	std		%f0, [$out + 0] +	std		%f2, [$out + 8] +	std		%f4, [$out + 16] +	std		%f6, [$out + 24] +	brnz,pt		$len, .L${bits}_xts_${dir}loop2x +	add		$out, 32, $out + +	fsrc2		%f4, %f0 +	fsrc2		%f6, %f2 +	brnz,pn		$rem, .L${bits}_xts_${dir}steal +	nop + +	ret +	restore + +.align	16 +2:	ldxa		[$inp]0x82, %o0		! avoid read-after-write hazard +						! and ~3x deterioration +						! in inp==out case +	faligndata	%f0, %f0, %f8		! handle unaligned output +	faligndata	%f0, %f2, %f10 +	faligndata	%f2, %f4, %f12 +	faligndata	%f4, %f6, %f14 +	faligndata	%f6, %f6, %f0 + +	stda		%f8, [$out + $omask]0xc0	! partial store +	std		%f10, [$out + 8] +	std		%f12, [$out + 16] +	std		%f14, [$out + 24] +	add		$out, 32, $out +	orn		%g0, $omask, $omask +	stda		%f0, [$out + $omask]0xc0	! partial store + +	brnz,pt		$len, .L${bits}_xts_${dir}loop2x+4 +	orn		%g0, $omask, $omask + +	fsrc2		%f4, %f0 +	fsrc2		%f6, %f2 +	brnz,pn		$rem, .L${bits}_xts_${dir}steal +	nop + +	ret +	restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align	32 +.L${bits}_xts_${dir}blk: +	add	$out, $len, $blk_init +	and	$blk_init, 63, $blk_init	! tail +	sub	$len, $blk_init, $len +	add	$blk_init, 15, $blk_init	! round up to 16n +	srlx	$len, 4, $len +	srl	$blk_init, 4, $blk_init +	sub	$len, 1, $len +	add	$blk_init, 1, $blk_init + +.L${bits}_xts_${dir}blk2x: +	ldx		[$inp + 0], %o0 +	ldx		[$inp + 8], %o1 +	ldx		[$inp + 16], %o2 +	brz,pt		$ileft, 5f +	ldx		[$inp + 24], %o3 + +	ldx		[$inp + 32], %o4 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	or		%g1, %o0, %o0 +	sllx		%o1, $ileft, %o1 +	srlx		%o2, $iright, %g1 +	or		%g1, %o1, %o1 +	sllx		%o2, $ileft, %o2 +	srlx		%o3, $iright, %g1 +	or		%g1, %o2, %o2 +	sllx		%o3, $ileft, %o3 +	srlx		%o4, $iright, %o4 +	or		%o4, %o3, %o3 +5: +	movxtod		%g2, %f12 +	movxtod		%g3, %f14 +	bshuffle	%f12, %f12, %f12 +	bshuffle	%f14, %f14, %f14 + +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %g2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %g3 +	xor		%l7, %g2, %g2 + +	movxtod		%g2, %f8 +	movxtod		%g3, %f10 +	bshuffle	%f8,  %f8,  %f8 +	bshuffle	%f10, %f10, %f10 + +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	xor		%g4, %o2, %o2		! ^= rk[0] +	xor		%g5, %o3, %o3 +	movxtod		%o0, %f0 +	movxtod		%o1, %f2 +	movxtod		%o2, %f4 +	movxtod		%o3, %f6 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 +	fxor		%f8,  %f4, %f4		! ^= tweak[0] +	fxor		%f10, %f6, %f6 + +	prefetch	[$inp + 32+63], 20 +	call		_${alg}${bits}_${dir}crypt_2x +	add		$inp, 32, $inp + +	movxtod		%g2, %f8 +	movxtod		%g3, %f10 + +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %g2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %g3 +	xor		%l7, %g2, %g2 + +	bshuffle	%f8,  %f8,  %f8 +	bshuffle	%f10, %f10, %f10 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 +	fxor		%f8,  %f4, %f4 +	fxor		%f10, %f6, %f6 + +	subcc		$len, 2, $len +	stda		%f0, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f2, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f4, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	add		$out, 8, $out +	stda		%f6, [$out]0xe2		! ASI_BLK_INIT, T4-specific +	bgu,pt		$::size_t_cc, .L${bits}_xts_${dir}blk2x +	add		$out, 8, $out + +	add		$blk_init, $len, $len +	andcc		$len, 1, %g0		! is number of blocks even? +	membar		#StoreLoad|#StoreStore +	bnz,pt		%icc, .L${bits}_xts_${dir}loop +	srl		$len, 0, $len +	brnz,pn		$len, .L${bits}_xts_${dir}loop2x +	nop + +	fsrc2		%f4, %f0 +	fsrc2		%f6, %f2 +	brnz,pn		$rem, .L${bits}_xts_${dir}steal +	nop + +	ret +	restore +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +___ +$code.=<<___ if ($dir eq "en"); +.align	32 +.L${bits}_xts_${dir}steal: +	std		%f0, [%fp + $::bias-16]	! copy of output +	std		%f2, [%fp + $::bias-8] + +	srl		$ileft, 3, $ileft +	add		%fp, $::bias-16, %l7 +	add		$inp, $ileft, $inp	! original $inp+$len&-15 +	add		$out, $ooff, $out	! original $out+$len&-15 +	mov		0, $ileft +	nop					! align + +.L${bits}_xts_${dir}stealing: +	ldub		[$inp + $ileft], %o0 +	ldub		[%l7  + $ileft], %o1 +	dec		$rem +	stb		%o0, [%l7  + $ileft] +	stb		%o1, [$out + $ileft] +	brnz		$rem, .L${bits}_xts_${dir}stealing +	inc		$ileft + +	mov		%l7, $inp +	sub		$out, 16, $out +	mov		0, $ileft +	sub		$out, $ooff, $out +	ba		.L${bits}_xts_${dir}loop	! one more time +	mov		1, $len				! $rem is 0 +___ +$code.=<<___ if ($dir eq "de"); +.align	32 +.L${bits}_xts_${dir}steal: +	ldx		[$inp + 0], %o0 +	brz,pt		$ileft, 8f +	ldx		[$inp + 8], %o1 + +	ldx		[$inp + 16], %o2 +	sllx		%o0, $ileft, %o0 +	srlx		%o1, $iright, %g1 +	sllx		%o1, $ileft, %o1 +	or		%g1, %o0, %o0 +	srlx		%o2, $iright, %o2 +	or		%o2, %o1, %o1 +8: +	srax		%g3, 63, %l7		! next tweak value +	addcc		%g2, %g2, %o2 +	and		%l7, 0x87, %l7 +	addxc		%g3, %g3, %o3 +	xor		%l7, %o2, %o2 + +	movxtod		%o2, %f12 +	movxtod		%o3, %f14 +	bshuffle	%f12, %f12, %f12 +	bshuffle	%f14, %f14, %f14 + +	xor		%g4, %o0, %o0		! ^= rk[0] +	xor		%g5, %o1, %o1 +	movxtod		%o0, %f0 +	movxtod		%o1, %f2 + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 + +	call		_${alg}${bits}_${dir}crypt_1x +	add		$inp, 16, $inp + +	fxor		%f12, %f0, %f0		! ^= tweak[0] +	fxor		%f14, %f2, %f2 + +	std		%f0, [%fp + $::bias-16] +	std		%f2, [%fp + $::bias-8] + +	srl		$ileft, 3, $ileft +	add		%fp, $::bias-16, %l7 +	add		$inp, $ileft, $inp	! original $inp+$len&-15 +	add		$out, $ooff, $out	! original $out+$len&-15 +	mov		0, $ileft +	add		$out, 16, $out +	nop					! align + +.L${bits}_xts_${dir}stealing: +	ldub		[$inp + $ileft], %o0 +	ldub		[%l7  + $ileft], %o1 +	dec		$rem +	stb		%o0, [%l7  + $ileft] +	stb		%o1, [$out + $ileft] +	brnz		$rem, .L${bits}_xts_${dir}stealing +	inc		$ileft + +	mov		%l7, $inp +	sub		$out, 16, $out +	mov		0, $ileft +	sub		$out, $ooff, $out +	ba		.L${bits}_xts_${dir}loop	! one more time +	mov		1, $len				! $rem is 0 +___ +$code.=<<___; +	ret +	restore +.type	${alg}${bits}_t4_xts_${dir}crypt,#function +.size	${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt +___ +} + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %visopf = (	"faligndata"	=> 0x048, +		"bshuffle"	=> 0x04c, +		"fnot2"		=> 0x066, +		"fxor"		=> 0x06c, +		"fsrc2"		=> 0x078	); + +    $ref = "$mnemonic\t$rs1,$rs2,$rd"; + +    if ($opf=$visopf{$mnemonic}) { +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%f([0-9]{1,2})/); +	    $_=$1; +	    if ($1>=32) { +		return $ref if ($1&1); +		# re-encode for upper double register addressing +		$_=($1|$1>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = (	"addxc"		=> 0x011, +		"addxccc"	=> 0x013, +		"umulxhi"	=> 0x016, +		"alignaddr"	=> 0x018, +		"bmask"		=> 0x019, +		"alignaddrl"	=> 0x01a	); + +    $ref = "$mnemonic\t$rs1,$rs2,$rd"; + +    if ($opf=$visopf{$mnemonic}) { +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%([goli])([0-9])/); +	    $_=$bias{$1}+$2; +	} + +	return	sprintf ".word\t0x%08x !%s", +			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub unaes_round {	# 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); +my %aesopf = (	"aes_eround01"	=> 0, +		"aes_eround23"	=> 1, +		"aes_dround01"	=> 2, +		"aes_dround23"	=> 3, +		"aes_eround01_l"=> 4, +		"aes_eround23_l"=> 5, +		"aes_dround01_l"=> 6, +		"aes_dround23_l"=> 7, +		"aes_kexpand1"	=> 8	); + +    $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + +    if (defined($opf=$aesopf{$mnemonic})) { +	$rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%f([0-9]{1,2})/); +	    $_=$1; +	    if ($1>=32) { +		return $ref if ($1&1); +		# re-encode for upper double register addressing +		$_=($1|$1>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub unaes_kexpand {	# 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %aesopf = (	"aes_kexpand0"	=> 0x130, +		"aes_kexpand2"	=> 0x131	); + +    $ref = "$mnemonic\t$rs1,$rs2,$rd"; + +    if (defined($opf=$aesopf{$mnemonic})) { +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%f([0-9]{1,2})/); +	    $_=$1; +	    if ($1>=32) { +		return $ref if ($1&1); +		# re-encode for upper double register addressing +		$_=($1|$1>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub uncamellia_f {	# 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); + +    $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + +    if (1) { +	$rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%f([0-9]{1,2})/); +	    $_=$1; +	    if ($1>=32) { +		return $ref if ($1&1); +		# re-encode for upper double register addressing +		$_=($1|$1>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub uncamellia3 {	# 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %cmllopf = (	"camellia_fl"	=> 0x13c, +		"camellia_fli"	=> 0x13d	); + +    $ref = "$mnemonic\t$rs1,$rs2,$rd"; + +    if (defined($opf=$cmllopf{$mnemonic})) { +	foreach ($rs1,$rs2,$rd) { +	    return $ref if (!/%f([0-9]{1,2})/); +	    $_=$1; +	    if ($1>=32) { +		return $ref if ($1&1); +		# re-encode for upper double register addressing +		$_=($1|$1>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, +			$ref; +    } else { +	return $ref; +    } +} + +sub unmovxtox {		# 2-argument instructions +my ($mnemonic,$rs,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 ); +my ($ref,$opf); +my %movxopf = (	"movdtox"	=> 0x110, +		"movstouw"	=> 0x111, +		"movstosw"	=> 0x113, +		"movxtod"	=> 0x118, +		"movwtos"	=> 0x119	); + +    $ref = "$mnemonic\t$rs,$rd"; + +    if (defined($opf=$movxopf{$mnemonic})) { +	foreach ($rs,$rd) { +	    return $ref if (!/%([fgoli])([0-9]{1,2})/); +	    $_=$bias{$1}+$2; +	    if ($2>=32) { +		return $ref if ($2&1); +		# re-encode for upper double register addressing +		$_=($2|$2>>5)&31; +	    } +	} + +	return	sprintf ".word\t0x%08x !%s", +			2<<30|$rd<<25|0x36<<19|$opf<<5|$rs, +			$ref; +    } else { +	return $ref; +    } +} + +sub undes { +my ($mnemonic)=shift; +my @args=@_; +my ($ref,$opf); +my %desopf = (	"des_round"	=> 0b1001, +		"des_ip"	=> 0b100110100, +		"des_iip"	=> 0b100110101, +		"des_kexpand"	=> 0b100110110	); + +    $ref = "$mnemonic\t".join(",",@_); + +    if (defined($opf=$desopf{$mnemonic})) {	# 4-arg +	if ($mnemonic eq "des_round") { +	    foreach (@args[0..3]) { +		return $ref if (!/%f([0-9]{1,2})/); +		$_=$1; +		if ($1>=32) { +		    return $ref if ($1&1); +		    # re-encode for upper double register addressing +		    $_=($1|$1>>5)&31; +		} +	    } +	    return  sprintf ".word\t0x%08x !%s", +			    2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, +			    $ref; +	} elsif ($mnemonic eq "des_kexpand") {	# 3-arg +	    foreach (@args[0..2]) { +		return $ref if (!/(%f)?([0-9]{1,2})/); +		$_=$2; +		if ($2>=32) { +		    return $ref if ($2&1); +		    # re-encode for upper double register addressing +		    $_=($2|$2>>5)&31; +		} +	    } +	    return  sprintf ".word\t0x%08x !%s", +			    2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, +			    $ref; +	} else {				# 2-arg +	    foreach (@args[0..1]) { +		return $ref if (!/%f([0-9]{1,2})/); +		$_=$1; +		if ($1>=32) { +		    return $ref if ($2&1); +		    # re-encode for upper double register addressing +		    $_=($1|$1>>5)&31; +		} +	    } +	    return  sprintf ".word\t0x%08x !%s", +			    2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, +			    $ref; +	} +    } else { +	return $ref; +    } +} + +sub emit_assembler { +    foreach (split("\n",$::code)) { +	s/\`([^\`]*)\`/eval $1/ge; + +	s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go; + +	s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ +		&unaes_round($1,$2,$3,$4,$5) +	 /geo or +	s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ +		&unaes_kexpand($1,$2,$3,$4) +	 /geo or +	s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ +		&uncamellia_f($1,$2,$3,$4,$5) +	 /geo or +	s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ +		&uncamellia3($1,$2,$3,$4) +	 /geo or +	s/\b(des_\w+)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+)(?:,\s*(%f[0-9]{1,2})(?:,\s*(%f[0-9]{1,2}))?)?/ +		&undes($1,$2,$3,$4,$5) +	 /geo or +	s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ +		&unmovxtox($1,$2,$3) +	 /geo or +	s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ +		&unmovxtox($1,$2,$3) +	 /geo or +	s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ +		&unvis($1,$2,$3,$4) +	 /geo or +	s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ +		&unvis3($1,$2,$3,$4) +	 /geo; + +	print $_,"\n"; +    } +} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl new file mode 100755 index 0000000..6eaefcf --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86_64-xlate.pl @@ -0,0 +1,1186 @@ +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# Ascetic x86_64 AT&T to MASM/NASM assembler translator by <appro>. +# +# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T +# format is way easier to parse. Because it's simpler to "gear" from +# Unix ABI to Windows one [see cross-reference "card" at the end of +# file]. Because Linux targets were available first... +# +# In addition the script also "distills" code suitable for GNU +# assembler, so that it can be compiled with more rigid assemblers, +# such as Solaris /usr/ccs/bin/as. +# +# This translator is not designed to convert *arbitrary* assembler +# code from AT&T format to MASM one. It's designed to convert just +# enough to provide for dual-ABI OpenSSL modules development... +# There *are* limitations and you might have to modify your assembler +# code or this script to achieve the desired result... +# +# Currently recognized limitations: +# +# - can't use multiple ops per line; +# +# Dual-ABI styling rules. +# +# 1. Adhere to Unix register and stack layout [see cross-reference +#    ABI "card" at the end for explanation]. +# 2. Forget about "red zone," stick to more traditional blended +#    stack frame allocation. If volatile storage is actually required +#    that is. If not, just leave the stack as is. +# 3. Functions tagged with ".type name,@function" get crafted with +#    unified Win64 prologue and epilogue automatically. If you want +#    to take care of ABI differences yourself, tag functions as +#    ".type name,@abi-omnipotent" instead. +# 4. To optimize the Win64 prologue you can specify number of input +#    arguments as ".type name,@function,N." Keep in mind that if N is +#    larger than 6, then you *have to* write "abi-omnipotent" code, +#    because >6 cases can't be addressed with unified prologue. +# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: +#    (sorry about latter). +# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is +#    required to identify the spots, where to inject Win64 epilogue! +#    But on the pros, it's then prefixed with rep automatically:-) +# 7. Stick to explicit ip-relative addressing. If you have to use +#    GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. +#    Both are recognized and translated to proper Win64 addressing +#    modes. To support legacy code a synthetic directive, .picmeup, +#    is implemented. It puts address of the *next* instruction into +#    target register, e.g.: +# +#		.picmeup	%rax +#		lea		.Label-.(%rax),%rax +# +# 8. In order to provide for structured exception handling unified +#    Win64 prologue copies %rsp value to %rax. For further details +#    see SEH paragraph at the end. +# 9. .init segment is allowed to contain calls to functions only. +# a. If function accepts more than 4 arguments *and* >4th argument +#    is declared as non 64-bit value, do clear its upper part. + + +use strict; + +my $flavour = shift; +my $output  = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +open STDOUT,">$output" || die "can't open $output: $!" +	if (defined($output)); + +my $gas=1;	$gas=0 if ($output =~ /\.asm$/); +my $elf=1;	$elf=0 if (!$gas); +my $win64=0; +my $prefix=""; +my $decor=".L"; + +my $masmref=8 + 50727*2**-32;	# 8.00.50727 shipped with VS2005 +my $masm=0; +my $PTR=" PTR"; + +my $nasmref=2.03; +my $nasm=0; + +if    ($flavour eq "mingw64")	{ $gas=1; $elf=0; $win64=1; +				  $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; +				  $prefix =~ s|\R$||; # Better chomp +				} +elsif ($flavour eq "macosx")	{ $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } +elsif ($flavour eq "masm")	{ $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } +elsif ($flavour eq "nasm")	{ $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } +elsif (!$gas) +{   if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i) +    {	$nasm = $1 + $2*0.01; $PTR="";  } +    elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) +    {	$masm = $1 + $2*2**-16 + $4*2**-32;   } +    die "no assembler found on %PATH" if (!($nasm || $masm)); +    $win64=1; +    $elf=0; +    $decor="\$L\$"; +} + +my $current_segment; +my $current_function; +my %globals; + +{ package opcode;	# pick up opcodes +    sub re { +	my	($class, $line) = @_; +	my	$self = {}; +	my	$ret; + +	if ($$line =~ /^([a-z][a-z0-9]*)/i) { +	    bless $self,$class; +	    $self->{op} = $1; +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + +	    undef $self->{sz}; +	    if ($self->{op} =~ /^(movz)x?([bw]).*/) {	# movz is pain... +		$self->{op} = $1; +		$self->{sz} = $2; +	    } elsif ($self->{op} =~ /call|jmp/) { +		$self->{sz} = ""; +	    } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn +		$self->{sz} = ""; +	    } elsif ($self->{op} =~ /^v/) { # VEX +		$self->{sz} = ""; +	    } elsif ($self->{op} =~ /mov[dq]/ && $$line =~ /%xmm/) { +		$self->{sz} = ""; +	    } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { +		$self->{op} = $1; +		$self->{sz} = $2; +	    } +	} +	$ret; +    } +    sub size { +	my ($self, $sz) = @_; +	$self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); +	$self->{sz}; +    } +    sub out { +	my $self = shift; +	if ($gas) { +	    if ($self->{op} eq "movz") {	# movz is pain... +		sprintf "%s%s%s",$self->{op},$self->{sz},shift; +	    } elsif ($self->{op} =~ /^set/) {  +		"$self->{op}"; +	    } elsif ($self->{op} eq "ret") { +		my $epilogue = ""; +		if ($win64 && $current_function->{abi} eq "svr4") { +		    $epilogue = "movq	8(%rsp),%rdi\n\t" . +				"movq	16(%rsp),%rsi\n\t"; +		} +	    	$epilogue . ".byte	0xf3,0xc3"; +	    } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { +		".p2align\t3\n\t.quad"; +	    } else { +		"$self->{op}$self->{sz}"; +	    } +	} else { +	    $self->{op} =~ s/^movz/movzx/; +	    if ($self->{op} eq "ret") { +		$self->{op} = ""; +		if ($win64 && $current_function->{abi} eq "svr4") { +		    $self->{op} = "mov	rdi,QWORD$PTR\[8+rsp\]\t;WIN64 epilogue\n\t". +				  "mov	rsi,QWORD$PTR\[16+rsp\]\n\t"; +	    	} +		$self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; +	    } elsif ($self->{op} =~ /^(pop|push)f/) { +		$self->{op} .= $self->{sz}; +	    } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { +		$self->{op} = "\tDQ"; +	    }  +	    $self->{op}; +	} +    } +    sub mnemonic { +	my ($self, $op) = @_; +	$self->{op}=$op if (defined($op)); +	$self->{op}; +    } +} +{ package const;	# pick up constants, which start with $ +    sub re { +	my	($class, $line) = @_; +	my	$self = {}; +	my	$ret; + +	if ($$line =~ /^\$([^,]+)/) { +	    bless $self, $class; +	    $self->{value} = $1; +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; +	} +	$ret; +    } +    sub out { +    	my $self = shift; + +	$self->{value} =~ s/\b(0b[0-1]+)/oct($1)/eig; +	if ($gas) { +	    # Solaris /usr/ccs/bin/as can't handle multiplications +	    # in $self->{value} +	    my $value = $self->{value}; +	    no warnings;    # oct might complain about overflow, ignore here... +	    $value =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; +	    if ($value =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg) { +		$self->{value} = $value; +	    } +	    sprintf "\$%s",$self->{value}; +	} else { +	    my $value = $self->{value}; +	    $value =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); +	    sprintf "%s",$value; +	} +    } +} +{ package ea;		# pick up effective addresses: expr(%reg,%reg,scale) +    sub re { +	my	($class, $line, $opcode) = @_; +	my	$self = {}; +	my	$ret; + +	# optional * ----vvv--- appears in indirect jmp/call +	if ($$line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) { +	    bless $self, $class; +	    $self->{asterisk} = $1; +	    $self->{label} = $2; +	    ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); +	    $self->{scale} = 1 if (!defined($self->{scale})); +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + +	    if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { +		die if ($opcode->mnemonic() ne "mov"); +		$opcode->mnemonic("lea"); +	    } +	    $self->{base}  =~ s/^%//; +	    $self->{index} =~ s/^%// if (defined($self->{index})); +	    $self->{opcode} = $opcode; +	} +	$ret; +    } +    sub size {} +    sub out { +	my ($self, $sz) = @_; + +	$self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; +	$self->{label} =~ s/\.L/$decor/g; + +	# Silently convert all EAs to 64-bit. This is required for +	# elder GNU assembler and results in more compact code, +	# *but* most importantly AES module depends on this feature! +	$self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; +	$self->{base}  =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + +	# Solaris /usr/ccs/bin/as can't handle multiplications +	# in $self->{label}... +	use integer; +	$self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; +	$self->{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg; + +	# Some assemblers insist on signed presentation of 32-bit +	# offsets, but sign extension is a tricky business in perl... +	if ((1<<31)<<1) { +	    $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg; +	} else { +	    $self->{label} =~ s/\b([0-9]+)\b/$1>>0/eg; +	} + +	if (!$self->{label} && $self->{index} && $self->{scale}==1 && +	    $self->{base} =~ /(rbp|r13)/) { +		$self->{base} = $self->{index}; $self->{index} = $1; +	} + +	if ($gas) { +	    $self->{label} =~ s/^___imp_/__imp__/   if ($flavour eq "mingw64"); + +	    if (defined($self->{index})) { +		sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk}, +					$self->{label}, +					$self->{base}?"%$self->{base}":"", +					$self->{index},$self->{scale}; +	    } else { +		sprintf "%s%s(%%%s)",	$self->{asterisk},$self->{label},$self->{base}; +	    } +	} else { +	    my %szmap = (	b=>"BYTE$PTR",  w=>"WORD$PTR", +			l=>"DWORD$PTR", d=>"DWORD$PTR", +	    		q=>"QWORD$PTR", o=>"OWORD$PTR", +			x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" ); + +	    $self->{label} =~ s/\./\$/g; +	    $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig; +	    $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); + +	    my $mnemonic = $self->{opcode}->mnemonic(); +	    ($self->{asterisk})				&& ($sz="q") || +	    ($mnemonic =~ /^v?mov([qd])$/)		&& ($sz=$1)  || +	    ($mnemonic =~ /^v?pinsr([qdwb])$/)		&& ($sz=$1)  || +	    ($mnemonic =~ /^vpbroadcast([qdwb])$/)	&& ($sz=$1)  || +	    ($mnemonic =~ /^v(?!perm)[a-z]+[fi]128$/)	&& ($sz="x"); + +	    if (defined($self->{index})) { +		sprintf "%s[%s%s*%d%s]",$szmap{$sz}, +					$self->{label}?"$self->{label}+":"", +					$self->{index},$self->{scale}, +					$self->{base}?"+$self->{base}":""; +	    } elsif ($self->{base} eq "rip") { +		sprintf "%s[%s]",$szmap{$sz},$self->{label}; +	    } else { +		sprintf "%s[%s%s]",$szmap{$sz}, +					$self->{label}?"$self->{label}+":"", +					$self->{base}; +	    } +	} +    } +} +{ package register;	# pick up registers, which start with %. +    sub re { +	my	($class, $line, $opcode) = @_; +	my	$self = {}; +	my	$ret; + +	# optional * ----vvv--- appears in indirect jmp/call +	if ($$line =~ /^(\*?)%(\w+)/) { +	    bless $self,$class; +	    $self->{asterisk} = $1; +	    $self->{value} = $2; +	    $opcode->size($self->size()); +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; +	} +	$ret; +    } +    sub size { +	my	$self = shift; +	my	$ret; + +	if    ($self->{value} =~ /^r[\d]+b$/i)	{ $ret="b"; } +	elsif ($self->{value} =~ /^r[\d]+w$/i)	{ $ret="w"; } +	elsif ($self->{value} =~ /^r[\d]+d$/i)	{ $ret="l"; } +	elsif ($self->{value} =~ /^r[\w]+$/i)	{ $ret="q"; } +	elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } +	elsif ($self->{value} =~ /^[\w]{2}l$/i)	{ $ret="b"; } +	elsif ($self->{value} =~ /^[\w]{2}$/i)	{ $ret="w"; } +	elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } + +	$ret; +    } +    sub out { +    	my $self = shift; +	if ($gas)	{ sprintf "%s%%%s",$self->{asterisk},$self->{value}; } +	else		{ $self->{value}; } +    } +} +{ package label;	# pick up labels, which end with : +    sub re { +	my	($class, $line) = @_; +	my	$self = {}; +	my	$ret; + +	if ($$line =~ /(^[\.\w]+)\:/) { +	    bless $self,$class; +	    $self->{value} = $1; +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + +	    $self->{value} =~ s/^\.L/$decor/; +	} +	$ret; +    } +    sub out { +	my $self = shift; + +	if ($gas) { +	    my $func = ($globals{$self->{value}} or $self->{value}) . ":"; +	    if ($win64	&& +			$current_function->{name} eq $self->{value} && +			$current_function->{abi} eq "svr4") { +		$func .= "\n"; +		$func .= "	movq	%rdi,8(%rsp)\n"; +		$func .= "	movq	%rsi,16(%rsp)\n"; +		$func .= "	movq	%rsp,%rax\n"; +		$func .= "${decor}SEH_begin_$current_function->{name}:\n"; +		my $narg = $current_function->{narg}; +		$narg=6 if (!defined($narg)); +		$func .= "	movq	%rcx,%rdi\n" if ($narg>0); +		$func .= "	movq	%rdx,%rsi\n" if ($narg>1); +		$func .= "	movq	%r8,%rdx\n"  if ($narg>2); +		$func .= "	movq	%r9,%rcx\n"  if ($narg>3); +		$func .= "	movq	40(%rsp),%r8\n" if ($narg>4); +		$func .= "	movq	48(%rsp),%r9\n" if ($narg>5); +	    } +	    $func; +	} elsif ($self->{value} ne "$current_function->{name}") { +	    # Make all labels in masm global. +	    $self->{value} .= ":" if ($masm); +	    $self->{value} . ":"; +	} elsif ($win64 && $current_function->{abi} eq "svr4") { +	    my $func =	"$current_function->{name}" . +			($nasm ? ":" : "\tPROC $current_function->{scope}") . +			"\n"; +	    $func .= "	mov	QWORD$PTR\[8+rsp\],rdi\t;WIN64 prologue\n"; +	    $func .= "	mov	QWORD$PTR\[16+rsp\],rsi\n"; +	    $func .= "	mov	rax,rsp\n"; +	    $func .= "${decor}SEH_begin_$current_function->{name}:"; +	    $func .= ":" if ($masm); +	    $func .= "\n"; +	    my $narg = $current_function->{narg}; +	    $narg=6 if (!defined($narg)); +	    $func .= "	mov	rdi,rcx\n" if ($narg>0); +	    $func .= "	mov	rsi,rdx\n" if ($narg>1); +	    $func .= "	mov	rdx,r8\n"  if ($narg>2); +	    $func .= "	mov	rcx,r9\n"  if ($narg>3); +	    $func .= "	mov	r8,QWORD$PTR\[40+rsp\]\n" if ($narg>4); +	    $func .= "	mov	r9,QWORD$PTR\[48+rsp\]\n" if ($narg>5); +	    $func .= "\n"; +	} else { +	   "$current_function->{name}". +			($nasm ? ":" : "\tPROC $current_function->{scope}"); +	} +    } +} +{ package expr;		# pick up expressions +    sub re { +	my	($class, $line, $opcode) = @_; +	my	$self = {}; +	my	$ret; + +	if ($$line =~ /(^[^,]+)/) { +	    bless $self,$class; +	    $self->{value} = $1; +	    $ret = $self; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + +	    $self->{value} =~ s/\@PLT// if (!$elf); +	    $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; +	    $self->{value} =~ s/\.L/$decor/g; +	    $self->{opcode} = $opcode; +	} +	$ret; +    } +    sub out { +	my $self = shift; +	if ($nasm && $self->{opcode}->mnemonic()=~m/^j(?![re]cxz)/) { +	    "NEAR ".$self->{value}; +	} else { +	    $self->{value}; +	} +    } +} +{ package directive;	# pick up directives, which start with . +    sub re { +	my	($class, $line) = @_; +	my	$self = {}; +	my	$ret; +	my	$dir; +	my	%opcode =	# lea 2f-1f(%rip),%dst; 1: nop; 2: +		(	"%rax"=>0x01058d48,	"%rcx"=>0x010d8d48, +			"%rdx"=>0x01158d48,	"%rbx"=>0x011d8d48, +			"%rsp"=>0x01258d48,	"%rbp"=>0x012d8d48, +			"%rsi"=>0x01358d48,	"%rdi"=>0x013d8d48, +			"%r8" =>0x01058d4c,	"%r9" =>0x010d8d4c, +			"%r10"=>0x01158d4c,	"%r11"=>0x011d8d4c, +			"%r12"=>0x01258d4c,	"%r13"=>0x012d8d4c, +			"%r14"=>0x01358d4c,	"%r15"=>0x013d8d4c	); + +	if ($$line =~ /^\s*(\.\w+)/) { +	    bless $self,$class; +	    $dir = $1; +	    $ret = $self; +	    undef $self->{value}; +	    $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; + +	    SWITCH: for ($dir) { +		/\.picmeup/ && do { if ($$line =~ /(%r[\w]+)/i) { +			    		$dir="\t.long"; +					$$line=sprintf "0x%x,0x90000000",$opcode{$1}; +				    } +				    last; +				  }; +		/\.global|\.globl|\.extern/ +			    && do { $globals{$$line} = $prefix . $$line; +				    $$line = $globals{$$line} if ($prefix); +				    last; +				  }; +		/\.type/    && do { my ($sym,$type,$narg) = split(',',$$line); +				    if ($type eq "\@function") { +					undef $current_function; +					$current_function->{name} = $sym; +					$current_function->{abi}  = "svr4"; +					$current_function->{narg} = $narg; +					$current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; +				    } elsif ($type eq "\@abi-omnipotent") { +					undef $current_function; +					$current_function->{name} = $sym; +					$current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; +				    } +				    $$line =~ s/\@abi\-omnipotent/\@function/; +				    $$line =~ s/\@function.*/\@function/; +				    last; +				  }; +		/\.asciz/   && do { if ($$line =~ /^"(.*)"$/) { +					$dir  = ".byte"; +					$$line = join(",",unpack("C*",$1),0); +				    } +				    last; +				  }; +		/\.rva|\.long|\.quad/ +			    && do { $$line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; +				    $$line =~ s/\.L/$decor/g; +				    last; +				  }; +	    } + +	    if ($gas) { +		$self->{value} = $dir . "\t" . $$line; + +		if ($dir =~ /\.extern/) { +		    $self->{value} = ""; # swallow extern +		} elsif (!$elf && $dir =~ /\.type/) { +		    $self->{value} = ""; +		    $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . +				(defined($globals{$1})?".scl 2;":".scl 3;") . +				"\t.type 32;\t.endef" +				if ($win64 && $$line =~ /([^,]+),\@function/); +		} elsif (!$elf && $dir =~ /\.size/) { +		    $self->{value} = ""; +		    if (defined($current_function)) { +			$self->{value} .= "${decor}SEH_end_$current_function->{name}:" +				if ($win64 && $current_function->{abi} eq "svr4"); +			undef $current_function; +		    } +		} elsif (!$elf && $dir =~ /\.align/) { +		    $self->{value} = ".p2align\t" . (log($$line)/log(2)); +		} elsif ($dir eq ".section") { +		    $current_segment=$$line; +		    if (!$elf && $current_segment eq ".init") { +			if	($flavour eq "macosx")	{ $self->{value} = ".mod_init_func"; } +			elsif	($flavour eq "mingw64")	{ $self->{value} = ".section\t.ctors"; } +		    } +		} elsif ($dir =~ /\.(text|data)/) { +		    $current_segment=".$1"; +		} elsif ($dir =~ /\.hidden/) { +		    if    ($flavour eq "macosx")  { $self->{value} = ".private_extern\t$prefix$$line"; } +		    elsif ($flavour eq "mingw64") { $self->{value} = ""; } +		} elsif ($dir =~ /\.comm/) { +		    $self->{value} = "$dir\t$prefix$$line"; +		    $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); +		} +		$$line = ""; +		return $self; +	    } + +	    # non-gas case or nasm/masm +	    SWITCH: for ($dir) { +		/\.text/    && do { my $v=undef; +				    if ($nasm) { +					$v="section	.text code align=64\n"; +				    } else { +					$v="$current_segment\tENDS\n" if ($current_segment); +					$current_segment = ".text\$"; +					$v.="$current_segment\tSEGMENT "; +					$v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE"; +					$v.=" 'CODE'"; +				    } +				    $self->{value} = $v; +				    last; +				  }; +		/\.data/    && do { my $v=undef; +				    if ($nasm) { +					$v="section	.data data align=8\n"; +				    } else { +					$v="$current_segment\tENDS\n" if ($current_segment); +					$current_segment = "_DATA"; +					$v.="$current_segment\tSEGMENT"; +				    } +				    $self->{value} = $v; +				    last; +				  }; +		/\.section/ && do { my $v=undef; +				    $$line =~ s/([^,]*).*/$1/; +				    $$line = ".CRT\$XCU" if ($$line eq ".init"); +				    if ($nasm) { +					$v="section	$$line"; +					if ($$line=~/\.([px])data/) { +					    $v.=" rdata align="; +					    $v.=$1 eq "p"? 4 : 8; +					} elsif ($$line=~/\.CRT\$/i) { +					    $v.=" rdata align=8"; +					} +				    } else { +					$v="$current_segment\tENDS\n" if ($current_segment); +					$v.="$$line\tSEGMENT"; +					if ($$line=~/\.([px])data/) { +					    $v.=" READONLY"; +					    $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); +					} elsif ($$line=~/\.CRT\$/i) { +					    $v.=" READONLY "; +					    $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; +					} +				    } +				    $current_segment = $$line; +				    $self->{value} = $v; +				    last; +				  }; +		/\.extern/  && do { $self->{value}  = "EXTERN\t".$$line; +				    $self->{value} .= ":NEAR" if ($masm); +				    last; +				  }; +		/\.globl|.global/ +			    && do { $self->{value}  = $masm?"PUBLIC":"global"; +				    $self->{value} .= "\t".$$line; +				    last; +				  }; +		/\.size/    && do { if (defined($current_function)) { +					undef $self->{value}; +					if ($current_function->{abi} eq "svr4") { +					    $self->{value}="${decor}SEH_end_$current_function->{name}:"; +					    $self->{value}.=":\n" if($masm); +					} +					$self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); +					undef $current_function; +				    } +				    last; +				  }; +		/\.align/   && do { my $max = ($masm && $masm>=$masmref) ? 256 : 4096; +				    $self->{value} = "ALIGN\t".($$line>$max?$max:$$line); +				    last; +				  }; +		/\.(value|long|rva|quad)/ +			    && do { my $sz  = substr($1,0,1); +				    my @arr = split(/,\s*/,$$line); +				    my $last = pop(@arr); +				    my $conv = sub  {	my $var=shift; +							$var=~s/^(0b[0-1]+)/oct($1)/eig; +							$var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); +							if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) +							{ $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } +							$var; +						    };   + +				    $sz =~ tr/bvlrq/BWDDQ/; +				    $self->{value} = "\tD$sz\t"; +				    for (@arr) { $self->{value} .= &$conv($_).","; } +				    $self->{value} .= &$conv($last); +				    last; +				  }; +		/\.byte/    && do { my @str=split(/,\s*/,$$line); +				    map(s/(0b[0-1]+)/oct($1)/eig,@str); +				    map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm);	 +				    while ($#str>15) { +					$self->{value}.="DB\t" +						.join(",",@str[0..15])."\n"; +					foreach (0..15) { shift @str; } +				    } +				    $self->{value}.="DB\t" +						.join(",",@str) if (@str); +				    last; +				  }; +		/\.comm/    && do { my @str=split(/,\s*/,$$line); +				    my $v=undef; +				    if ($nasm) { +					$v.="common	$prefix@str[0] @str[1]"; +				    } else { +					$v="$current_segment\tENDS\n" if ($current_segment); +					$current_segment = "_DATA"; +					$v.="$current_segment\tSEGMENT\n"; +					$v.="COMM	@str[0]:DWORD:".@str[1]/4; +				    } +				    $self->{value} = $v; +				    last; +				  }; +	    } +	    $$line = ""; +	} + +	$ret; +    } +    sub out { +	my $self = shift; +	$self->{value}; +    } +} + +sub rex { + my $opcode=shift; + my ($dst,$src,$rex)=@_; + +   $rex|=0x04 if($dst>=8); +   $rex|=0x01 if($src>=8); +   push @$opcode,($rex|0x40) if ($rex); +} + +# Upon initial x86_64 introduction SSE>2 extensions were not introduced +# yet. In order not to be bothered by tracing exact assembler versions, +# but at the same time to provide a bare security minimum of AES-NI, we +# hard-code some instructions. Extensions past AES-NI on the other hand +# are traced by examining assembler version in individual perlasm +# modules... + +my %regrm = (	"%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, +		"%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7	); + +my $movq = sub {	# elderly gas can't handle inter-register movq +  my $arg = shift; +  my @opcode=(0x66); +    if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { +	my ($src,$dst)=($1,$2); +	if ($dst !~ /[0-9]+/)	{ $dst = $regrm{"%e$dst"}; } +	rex(\@opcode,$src,$dst,0x8); +	push @opcode,0x0f,0x7e; +	push @opcode,0xc0|(($src&7)<<3)|($dst&7);	# ModR/M +	@opcode; +    } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { +	my ($src,$dst)=($2,$1); +	if ($dst !~ /[0-9]+/)	{ $dst = $regrm{"%e$dst"}; } +	rex(\@opcode,$src,$dst,0x8); +	push @opcode,0x0f,0x6e; +	push @opcode,0xc0|(($src&7)<<3)|($dst&7);	# ModR/M +	@opcode; +    } else { +	(); +    } +}; + +my $pextrd = sub { +    if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { +      my @opcode=(0x66); +	my $imm=$1; +	my $src=$2; +	my $dst=$3; +	if ($dst =~ /%r([0-9]+)d/)	{ $dst = $1; } +	elsif ($dst =~ /%e/)		{ $dst = $regrm{$dst}; } +	rex(\@opcode,$src,$dst); +	push @opcode,0x0f,0x3a,0x16; +	push @opcode,0xc0|(($src&7)<<3)|($dst&7);	# ModR/M +	push @opcode,$imm; +	@opcode; +    } else { +	(); +    } +}; + +my $pinsrd = sub { +    if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x66); +	my $imm=$1; +	my $src=$2; +	my $dst=$3; +	if ($src =~ /%r([0-9]+)/)	{ $src = $1; } +	elsif ($src =~ /%e/)		{ $src = $regrm{$src}; } +	rex(\@opcode,$dst,$src); +	push @opcode,0x0f,0x3a,0x22; +	push @opcode,0xc0|(($dst&7)<<3)|($src&7);	# ModR/M +	push @opcode,$imm; +	@opcode; +    } else { +	(); +    } +}; + +my $pshufb = sub { +    if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x66); +	rex(\@opcode,$2,$1); +	push @opcode,0x0f,0x38,0x00; +	push @opcode,0xc0|($1&7)|(($2&7)<<3);		# ModR/M +	@opcode; +    } else { +	(); +    } +}; + +my $palignr = sub { +    if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x66); +	rex(\@opcode,$3,$2); +	push @opcode,0x0f,0x3a,0x0f; +	push @opcode,0xc0|($2&7)|(($3&7)<<3);		# ModR/M +	push @opcode,$1; +	@opcode; +    } else { +	(); +    } +}; + +my $pclmulqdq = sub { +    if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x66); +	rex(\@opcode,$3,$2); +	push @opcode,0x0f,0x3a,0x44; +	push @opcode,0xc0|($2&7)|(($3&7)<<3);		# ModR/M +	my $c=$1; +	push @opcode,$c=~/^0/?oct($c):$c; +	@opcode; +    } else { +	(); +    } +}; + +my $rdrand = sub { +    if (shift =~ /%[er](\w+)/) { +      my @opcode=(); +      my $dst=$1; +	if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } +	rex(\@opcode,0,$dst,8); +	push @opcode,0x0f,0xc7,0xf0|($dst&7); +	@opcode; +    } else { +	(); +    } +}; + +my $rdseed = sub { +    if (shift =~ /%[er](\w+)/) { +      my @opcode=(); +      my $dst=$1; +	if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } +	rex(\@opcode,0,$dst,8); +	push @opcode,0x0f,0xc7,0xf8|($dst&7); +	@opcode; +    } else { +	(); +    } +}; + +sub rxb { + my $opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + +   $rxb|=0x7<<5; +   $rxb&=~(0x04<<5) if($dst>=8); +   $rxb&=~(0x01<<5) if($src1>=8); +   $rxb&=~(0x02<<5) if($src2>=8); +   push @$opcode,$rxb; +} + +my $vprotd = sub { +    if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x8f); +	rxb(\@opcode,$3,$2,-1,0x08); +	push @opcode,0x78,0xc2; +	push @opcode,0xc0|($2&7)|(($3&7)<<3);		# ModR/M +	my $c=$1; +	push @opcode,$c=~/^0/?oct($c):$c; +	@opcode; +    } else { +	(); +    } +}; + +my $vprotq = sub { +    if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { +      my @opcode=(0x8f); +	rxb(\@opcode,$3,$2,-1,0x08); +	push @opcode,0x78,0xc3; +	push @opcode,0xc0|($2&7)|(($3&7)<<3);		# ModR/M +	my $c=$1; +	push @opcode,$c=~/^0/?oct($c):$c; +	@opcode; +    } else { +	(); +    } +}; + +my $endbranch = sub { +    (0xf3,0x0f,0x1e,0xfa); +}; + +if ($nasm) { +    print <<___; +default	rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +___ +} elsif ($masm) { +    print <<___; +OPTION	DOTNAME +___ +} +while(defined(my $line=<>)) { + +    $line =~ s|\R$||;           # Better chomp + +    $line =~ s|[#!].*$||;	# get rid of asm-style comments... +    $line =~ s|/\*.*\*/||;	# ... and C-style comments... +    $line =~ s|^\s+||;		# ... and skip white spaces in beginning +    $line =~ s|\s+$||;		# ... and at the end + +    if (my $label=label->re(\$line))	{ print $label->out(); } + +    if (my $directive=directive->re(\$line)) { +	printf "%s",$directive->out(); +    } elsif (my $opcode=opcode->re(\$line)) { +	my $asm = eval("\$".$opcode->mnemonic()); +	 +	if ((ref($asm) eq 'CODE') && scalar(my @bytes=&$asm($line))) { +	    print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; +	    next; +	} + +	my @args; +	ARGUMENT: while (1) { +	    my $arg; + +	    ($arg=register->re(\$line, $opcode))|| +	    ($arg=const->re(\$line))		|| +	    ($arg=ea->re(\$line, $opcode))	|| +	    ($arg=expr->re(\$line, $opcode))	|| +	    last ARGUMENT; + +	    push @args,$arg; + +	    last ARGUMENT if ($line !~ /^,/); + +	    $line =~ s/^,\s*//; +	} # ARGUMENT: + +	if ($#args>=0) { +	    my $insn; +	    my $sz=$opcode->size(); + +	    if ($gas) { +		$insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); +		@args = map($_->out($sz),@args); +		printf "\t%s\t%s",$insn,join(",",@args); +	    } else { +		$insn = $opcode->out(); +		foreach (@args) { +		    my $arg = $_->out(); +		    # $insn.=$sz compensates for movq, pinsrw, ... +		    if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } +		    if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; } +		    if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; } +		    if ($arg =~ /^mm[0-9]+$/)  { $insn.=$sz; $sz="q" if(!$sz); last; } +		} +		@args = reverse(@args); +		undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); +		printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); +	    } +	} else { +	    printf "\t%s",$opcode->out(); +	} +    } + +    print $line,"\n"; +} + +print "\n$current_segment\tENDS\n"	if ($current_segment && $masm); +print "END\n"				if ($masm); + +close STDOUT; + +################################################# +# Cross-reference x86_64 ABI "card" +# +# 		Unix		Win64 +# %rax		*		* +# %rbx		-		- +# %rcx		#4		#1 +# %rdx		#3		#2 +# %rsi		#2		- +# %rdi		#1		- +# %rbp		-		- +# %rsp		-		- +# %r8		#5		#3 +# %r9		#6		#4 +# %r10		*		* +# %r11		*		* +# %r12		-		- +# %r13		-		- +# %r14		-		- +# %r15		-		- +#  +# (*)	volatile register +# (-)	preserved by callee +# (#)	Nth argument, volatile +# +# In Unix terms top of stack is argument transfer area for arguments +# which could not be accommodated in registers. Or in other words 7th +# [integer] argument resides at 8(%rsp) upon function entry point. +# 128 bytes above %rsp constitute a "red zone" which is not touched +# by signal handlers and can be used as temporal storage without +# allocating a frame. +# +# In Win64 terms N*8 bytes on top of stack is argument transfer area, +# which belongs to/can be overwritten by callee. N is the number of +# arguments passed to callee, *but* not less than 4! This means that +# upon function entry point 5th argument resides at 40(%rsp), as well +# as that 32 bytes from 8(%rsp) can always be used as temporal +# storage [without allocating a frame]. One can actually argue that +# one can assume a "red zone" above stack pointer under Win64 as well. +# Point is that at apparently no occasion Windows kernel would alter +# the area above user stack pointer in true asynchronous manner... +# +# All the above means that if assembler programmer adheres to Unix +# register and stack layout, but disregards the "red zone" existence, +# it's possible to use following prologue and epilogue to "gear" from +# Unix to Win64 ABI in leaf functions with not more than 6 arguments. +# +# omnipotent_function: +# ifdef WIN64 +#	movq	%rdi,8(%rsp) +#	movq	%rsi,16(%rsp) +#	movq	%rcx,%rdi	; if 1st argument is actually present +#	movq	%rdx,%rsi	; if 2nd argument is actually ... +#	movq	%r8,%rdx	; if 3rd argument is ... +#	movq	%r9,%rcx	; if 4th argument ... +#	movq	40(%rsp),%r8	; if 5th ... +#	movq	48(%rsp),%r9	; if 6th ... +# endif +#	... +# ifdef WIN64 +#	movq	8(%rsp),%rdi +#	movq	16(%rsp),%rsi +# endif +#	ret +# +################################################# +# Win64 SEH, Structured Exception Handling. +# +# Unlike on Unix systems(*) lack of Win64 stack unwinding information +# has undesired side-effect at run-time: if an exception is raised in +# assembler subroutine such as those in question (basically we're +# referring to segmentation violations caused by malformed input +# parameters), the application is briskly terminated without invoking +# any exception handlers, most notably without generating memory dump +# or any user notification whatsoever. This poses a problem. It's +# possible to address it by registering custom language-specific +# handler that would restore processor context to the state at +# subroutine entry point and return "exception is not handled, keep +# unwinding" code. Writing such handler can be a challenge... But it's +# doable, though requires certain coding convention. Consider following +# snippet: +# +# .type	function,@function +# function: +#	movq	%rsp,%rax	# copy rsp to volatile register +#	pushq	%r15		# save non-volatile registers +#	pushq	%rbx +#	pushq	%rbp +#	movq	%rsp,%r11 +#	subq	%rdi,%r11	# prepare [variable] stack frame +#	andq	$-64,%r11 +#	movq	%rax,0(%r11)	# check for exceptions +#	movq	%r11,%rsp	# allocate [variable] stack frame +#	movq	%rax,0(%rsp)	# save original rsp value +# magic_point: +#	... +#	movq	0(%rsp),%rcx	# pull original rsp value +#	movq	-24(%rcx),%rbp	# restore non-volatile registers +#	movq	-16(%rcx),%rbx +#	movq	-8(%rcx),%r15 +#	movq	%rcx,%rsp	# restore original rsp +#	ret +# .size function,.-function +# +# The key is that up to magic_point copy of original rsp value remains +# in chosen volatile register and no non-volatile register, except for +# rsp, is modified. While past magic_point rsp remains constant till +# the very end of the function. In this case custom language-specific +# exception handler would look like this: +# +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +#		CONTEXT *context,DISPATCHER_CONTEXT *disp) +# {	ULONG64 *rsp = (ULONG64 *)context->Rax; +#	if (context->Rip >= magic_point) +#	{   rsp = ((ULONG64 **)context->Rsp)[0]; +#	    context->Rbp = rsp[-3]; +#	    context->Rbx = rsp[-2]; +#	    context->R15 = rsp[-1]; +#	} +#	context->Rsp = (ULONG64)rsp; +#	context->Rdi = rsp[1]; +#	context->Rsi = rsp[2]; +# +#	memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); +#	RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, +#		dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, +#		&disp->HandlerData,&disp->EstablisherFrame,NULL); +#	return ExceptionContinueSearch; +# } +# +# It's appropriate to implement this handler in assembler, directly in +# function's module. In order to do that one has to know members' +# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant +# values. Here they are: +# +#	CONTEXT.Rax				120 +#	CONTEXT.Rcx				128 +#	CONTEXT.Rdx				136 +#	CONTEXT.Rbx				144 +#	CONTEXT.Rsp				152 +#	CONTEXT.Rbp				160 +#	CONTEXT.Rsi				168 +#	CONTEXT.Rdi				176 +#	CONTEXT.R8				184 +#	CONTEXT.R9				192 +#	CONTEXT.R10				200 +#	CONTEXT.R11				208 +#	CONTEXT.R12				216 +#	CONTEXT.R13				224 +#	CONTEXT.R14				232 +#	CONTEXT.R15				240 +#	CONTEXT.Rip				248 +#	CONTEXT.Xmm6				512 +#	sizeof(CONTEXT)				1232 +#	DISPATCHER_CONTEXT.ControlPc		0 +#	DISPATCHER_CONTEXT.ImageBase		8 +#	DISPATCHER_CONTEXT.FunctionEntry	16 +#	DISPATCHER_CONTEXT.EstablisherFrame	24 +#	DISPATCHER_CONTEXT.TargetIp		32 +#	DISPATCHER_CONTEXT.ContextRecord	40 +#	DISPATCHER_CONTEXT.LanguageHandler	48 +#	DISPATCHER_CONTEXT.HandlerData		56 +#	UNW_FLAG_NHANDLER			0 +#	ExceptionContinueSearch			1 +# +# In order to tie the handler to the function one has to compose +# couple of structures: one for .xdata segment and one for .pdata. +# +# UNWIND_INFO structure for .xdata segment would be +# +# function_unwind_info: +#	.byte	9,0,0,0 +#	.rva	handler +# +# This structure designates exception handler for a function with +# zero-length prologue, no stack frame or frame register. +# +# To facilitate composing of .pdata structures, auto-generated "gear" +# prologue copies rsp value to rax and denotes next instruction with +# .LSEH_begin_{function_name} label. This essentially defines the SEH +# styling rule mentioned in the beginning. Position of this label is +# chosen in such manner that possible exceptions raised in the "gear" +# prologue would be accounted to caller and unwound from latter's frame. +# End of function is marked with respective .LSEH_end_{function_name} +# label. To summarize, .pdata segment would contain +# +#	.rva	.LSEH_begin_function +#	.rva	.LSEH_end_function +#	.rva	function_unwind_info +# +# Reference to function_unwind_info from .xdata segment is the anchor. +# In case you wonder why references are 32-bit .rvas and not 64-bit +# .quads. References put into these two segments are required to be +# *relative* to the base address of the current binary module, a.k.a. +# image base. No Win64 module, be it .exe or .dll, can be larger than +# 2GB and thus such relative references can be and are accommodated in +# 32 bits. +# +# Having reviewed the example function code, one can argue that "movq +# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix +# rax would contain an undefined value. If this "offends" you, use +# another register and refrain from modifying rax till magic_point is +# reached, i.e. as if it was a non-volatile register. If more registers +# are required prior [variable] frame setup is completed, note that +# nobody says that you can have only one "magic point." You can +# "liberate" non-volatile registers by denoting last stack off-load +# instruction and reflecting it in finer grade unwind logic in handler. +# After all, isn't it why it's called *language-specific* handler... +# +# Attentive reader can notice that exceptions would be mishandled in +# auto-generated "gear" epilogue. Well, exception effectively can't +# occur there, because if memory area used by it was subject to +# segmentation violation, then it would be raised upon call to the +# function (and as already mentioned be accounted to caller, which is +# not a problem). If you're still not comfortable, then define tail +# "magic point" just prior ret instruction and have handler treat it... +# +# (*)	Note that we're talking about run-time, not debug-time. Lack of +#	unwind information makes debugging hard on both Windows and +#	Unix. "Unlike" referes to the fact that on Unix signal handler +#	will always be invoked, core dumped and appropriate exit code +#	returned to parent (for user notification). diff --git a/openssl-1.1.0h/crypto/perlasm/x86asm.pl b/openssl-1.1.0h/crypto/perlasm/x86asm.pl new file mode 100644 index 0000000..1ff46c9 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86asm.pl @@ -0,0 +1,310 @@ +#! /usr/bin/env perl +# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +# require 'x86asm.pl'; +# &asm_init(<flavor>,"des-586.pl"[,$i386only]); +# &function_begin("foo"); +# ... +# &function_end("foo"); +# &asm_finish + +$out=(); +$i386=0; + +# AUTOLOAD is this context has quite unpleasant side effect, namely +# that typos in function calls effectively go to assembler output, +# but on the pros side we don't have to implement one subroutine per +# each opcode... +sub ::AUTOLOAD +{ my $opcode = $AUTOLOAD; + +    die "more than 4 arguments passed to $opcode" if ($#_>3); + +    $opcode =~ s/.*:://; +    if    ($opcode =~ /^push/) { $stack+=4; } +    elsif ($opcode =~ /^pop/)  { $stack-=4; } + +    &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD"; +} + +sub ::emit +{ my $opcode=shift; + +    if ($#_==-1)    { push(@out,"\t$opcode\n");				} +    else            { push(@out,"\t$opcode\t".join(',',@_)."\n");	} +} + +sub ::LB +{   $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'"; +  $1."l"; +} +sub ::HB +{   $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'"; +  $1."h"; +} +sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num);	} +sub ::stack_pop	{ my $num=$_[0]*4; $stack-=$num; &add("esp",$num);	} +sub ::blindpop	{ &pop($_[0]); $stack+=4;				} +sub ::wparam	{ &DWP($stack+4*$_[0],"esp");				} +sub ::swtmp	{ &DWP(4*$_[0],"esp");					} + +sub ::bswap +{   if ($i386)	# emulate bswap for i386 +    {	&comment("bswap @_"); +	&xchg(&HB(@_),&LB(@_)); +	&ror (@_,16); +	&xchg(&HB(@_),&LB(@_)); +    } +    else +    {	&generic("bswap",@_);	} +} +# These are made-up opcodes introduced over the years essentially +# by ignorance, just alias them to real ones... +sub ::movb	{ &mov(@_);	} +sub ::xorb	{ &xor(@_);	} +sub ::rotl	{ &rol(@_);	} +sub ::rotr	{ &ror(@_);	} +sub ::exch	{ &xchg(@_);	} +sub ::halt	{ &hlt;		} +sub ::movz	{ &movzx(@_);	} +sub ::pushf	{ &pushfd;	} +sub ::popf	{ &popfd;	} + +# 3 argument instructions +sub ::movq +{ my($p1,$p2,$optimize)=@_; + +    if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) +    # movq between mmx registers can sink Intel CPUs +    {	&::pshufw($p1,$p2,0xe4);		} +    else +    {	&::generic("movq",@_);			} +} + +# SSE>2 instructions +my %regrm = (	"eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3, +		"esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7	); +sub ::pextrd +{ my($dst,$src,$imm)=@_; +    if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/) +    {	&::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm);	} +    else +    {	&::generic("pextrd",@_);		} +} + +sub ::pinsrd +{ my($dst,$src,$imm)=@_; +    if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/) +    {	&::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm);	} +    else +    {	&::generic("pinsrd",@_);		} +} + +sub ::pshufb +{ my($dst,$src)=@_; +    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) +    {	&data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2);	} +    else +    {	&::generic("pshufb",@_);		} +} + +sub ::palignr +{ my($dst,$src,$imm)=@_; +    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) +    {	&::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm);	} +    else +    {	&::generic("palignr",@_);		} +} + +sub ::pclmulqdq +{ my($dst,$src,$imm)=@_; +    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) +    {	&::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm);	} +    else +    {	&::generic("pclmulqdq",@_);		} +} + +sub ::rdrand +{ my ($dst)=@_; +    if ($dst =~ /(e[a-dsd][ixp])/) +    {	&::data_byte(0x0f,0xc7,0xf0|$regrm{$dst});	} +    else +    {	&::generic("rdrand",@_);	} +} + +sub ::rdseed +{ my ($dst)=@_; +    if ($dst =~ /(e[a-dsd][ixp])/) +    {	&::data_byte(0x0f,0xc7,0xf8|$regrm{$dst});	} +    else +    {	&::generic("rdrand",@_);	} +} + +sub rxb { + local *opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + +   $rxb|=0x7<<5; +   $rxb&=~(0x04<<5) if($dst>=8); +   $rxb&=~(0x01<<5) if($src1>=8); +   $rxb&=~(0x02<<5) if($src2>=8); +   push @opcode,$rxb; +} + +sub ::vprotd +{ my $args=join(',',@_); +    if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/) +    { my @opcode=(0x8f); +	rxb(\@opcode,$1,$2,-1,0x08); +	push @opcode,0x78,0xc2; +	push @opcode,0xc0|($2&7)|(($1&7)<<3);		# ModR/M +	my $c=$3; +	push @opcode,$c=~/^0/?oct($c):$c; +	&::data_byte(@opcode); +    } +    else +    {	&::generic("vprotd",@_);	} +} + +sub ::endbranch +{ +    &::data_byte(0xf3,0x0f,0x1e,0xfb); +} + +# label management +$lbdecor="L";		# local label decoration, set by package +$label="000"; + +sub ::islabel		# see is argument is a known label +{ my $i; +    foreach $i (values %label) { return $i if ($i eq $_[0]); } +  $label{$_[0]};	# can be undef +} + +sub ::label		# instantiate a function-scope label +{   if (!defined($label{$_[0]})) +    {	$label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++;   } +  $label{$_[0]}; +} + +sub ::LABEL		# instantiate a file-scope label +{   $label{$_[0]}=$_[1] if (!defined($label{$_[0]})); +  $label{$_[0]}; +} + +sub ::static_label	{ &::LABEL($_[0],$lbdecor.$_[0]); } + +sub ::set_label_B	{ push(@out,"@_:\n"); } +sub ::set_label +{ my $label=&::label($_[0]); +    &::align($_[1]) if ($_[1]>1); +    &::set_label_B($label); +  $label; +} + +sub ::wipe_labels	# wipes function-scope labels +{   foreach $i (keys %label) +    {	delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/);	} +} + +# subroutine management +sub ::function_begin +{   &function_begin_B(@_); +    $stack=4; +    &push("ebp"); +    &push("ebx"); +    &push("esi"); +    &push("edi"); +} + +sub ::function_end +{   &pop("edi"); +    &pop("esi"); +    &pop("ebx"); +    &pop("ebp"); +    &ret(); +    &function_end_B(@_); +    $stack=0; +    &wipe_labels(); +} + +sub ::function_end_A +{   &pop("edi"); +    &pop("esi"); +    &pop("ebx"); +    &pop("ebp"); +    &ret(); +    $stack+=16;	# readjust esp as if we didn't pop anything +} + +sub ::asciz +{ my @str=unpack("C*",shift); +    push @str,0; +    while ($#str>15) { +	&data_byte(@str[0..15]); +	foreach (0..15) { shift @str; } +    } +    &data_byte(@str) if (@str); +} + +sub ::asm_finish +{   &file_end(); +    print @out; +} + +sub ::asm_init +{ my ($type,$fn,$cpu)=@_; + +    $filename=$fn; +    $i386=$cpu; + +    $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; +    if    (($type eq "elf")) +    {	$elf=1;			require "x86gas.pl";	} +    elsif (($type eq "elf-1")) +    {	$elf=-1;		require "x86gas.pl";	} +    elsif (($type eq "a\.out")) +    {	$aout=1;		require "x86gas.pl";	} +    elsif (($type eq "coff" or $type eq "gaswin")) +    {	$coff=1;		require "x86gas.pl";	} +    elsif (($type eq "win32n")) +    {	$win32=1;		require "x86nasm.pl";	} +    elsif (($type eq "nw-nasm")) +    {	$netware=1;		require "x86nasm.pl";	} +    #elsif (($type eq "nw-mwasm")) +    #{	$netware=1; $mwerks=1;	require "x86nasm.pl";	} +    elsif (($type eq "win32")) +    {	$win32=1;		require "x86masm.pl";	} +    elsif (($type eq "macosx")) +    {	$aout=1; $macosx=1;	require "x86gas.pl";	} +    elsif (($type eq "android")) +    {	$elf=1; $android=1;	require "x86gas.pl";	} +    else +    {	print STDERR <<"EOF"; +Pick one target type from +	elf	- Linux, FreeBSD, Solaris x86, etc. +	a.out	- DJGPP, elder OpenBSD, etc. +	coff	- GAS/COFF such as Win32 targets +	win32n	- Windows 95/Windows NT NASM format +	nw-nasm - NetWare NASM format +	macosx	- Mac OS X +EOF +	exit(1); +    } + +    $pic=0; +    for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } + +    $filename =~ s/\.pl$//; +    &file($filename); +} + +sub ::hidden {} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86gas.pl b/openssl-1.1.0h/crypto/perlasm/x86gas.pl new file mode 100644 index 0000000..2c8fce0 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86gas.pl @@ -0,0 +1,265 @@ +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +package x86gas; + +*out=\@::out; + +$::lbdecor=$::aout?"L":".L";		# local label decoration +$nmdecor=($::aout or $::coff)?"_":"";	# external name decoration + +$initseg=""; + +$align=16; +$align=log($align)/log(2) if ($::aout); +$com_start="#" if ($::aout or $::coff); + +sub opsize() +{ my $reg=shift; +    if    ($reg =~ m/^%e/o)		{ "l"; } +    elsif ($reg =~ m/^%[a-d][hl]$/o)	{ "b"; } +    elsif ($reg =~ m/^%[yxm]/o)		{ undef; } +    else				{ "w"; } +} + +# swap arguments; +# expand opcode with size suffix; +# prefix numeric constants with $; +sub ::generic +{ my($opcode,@arg)=@_; +  my($suffix,$dst,$src); + +    @arg=reverse(@arg); + +    for (@arg) +    {	s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o;	# gp registers +	s/^([xy]?mm[0-7])$/%$1/o;		# xmm/mmx registers +	s/^(\-?[0-9]+)$/\$$1/o;			# constants +	s/^(\-?0x[0-9a-f]+)$/\$$1/o;		# constants +    } + +    $dst = $arg[$#arg]		if ($#arg>=0); +    $src = $arg[$#arg-1]	if ($#arg>=1); +    if    ($dst =~ m/^%/o)	{ $suffix=&opsize($dst); } +    elsif ($src =~ m/^%/o)	{ $suffix=&opsize($src); } +    else			{ $suffix="l";           } +    undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); + +    if ($#_==0)				{ &::emit($opcode);		} +    elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) +					{ &::emit($opcode,@arg);	} +    else				{ &::emit($opcode.$suffix,@arg);} + +  1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::movzx	{ &::movzb(@_);			} +sub ::pushfd	{ &::pushfl;			} +sub ::popfd	{ &::popfl;			} +sub ::cpuid	{ &::emit(".byte\t0x0f,0xa2");	} +sub ::rdtsc	{ &::emit(".byte\t0x0f,0x31");	} + +sub ::call	{ &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr	{ &::generic("call","*$_[0]");	} +sub ::jmp_ptr	{ &::generic("jmp","*$_[0]");	} + +*::bswap = sub	{ &::emit("bswap","%$_[0]");	} if (!$::i386); + +sub ::DWP +{ my($addr,$reg1,$reg2,$idx)=@_; +  my $ret=""; + +    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + +    $addr =~ s/^\s+//; +    # prepend global references with optional underscore +    $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; + +    $reg1 = "%$reg1" if ($reg1); +    $reg2 = "%$reg2" if ($reg2); + +    $ret .= $addr if (($addr ne "") && ($addr ne 0)); + +    if ($reg2) +    {	$idx!= 0 or $idx=1; +	$ret .= "($reg1,$reg2,$idx)"; +    } +    elsif ($reg1) +    {	$ret .= "($reg1)";	} + +  $ret; +} +sub ::QWP	{ &::DWP(@_);	} +sub ::BP	{ &::DWP(@_);	} +sub ::WP	{ &::DWP(@_);	} +sub ::BC	{ @_;		} +sub ::DWC	{ @_;		} + +sub ::file +{   push(@out,".file\t\"$_[0].s\"\n.text\n");	} + +sub ::function_begin_B +{ my $func=shift; +  my $global=($func !~ /^_/); +  my $begin="${::lbdecor}_${func}_begin"; + +    &::LABEL($func,$global?"$begin":"$nmdecor$func"); +    $func=$nmdecor.$func; + +    push(@out,".globl\t$func\n")	if ($global); +    if ($::coff) +    {	push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); } +    elsif (($::aout and !$::pic) or $::macosx) +    { } +    else +    {	push(@out,".type	$func,\@function\n"); } +    push(@out,".align\t$align\n"); +    push(@out,"$func:\n"); +    push(@out,"$begin:\n")		if ($global); +    $::stack=4; +} + +sub ::function_end_B +{ my $func=shift; +    push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf); +    $::stack=0; +    &::wipe_labels(); +} + +sub ::comment +	{ +	if (!defined($com_start) or $::elf) +		{	# Regarding $::elf above... +			# GNU and SVR4 as'es use different comment delimiters, +		push(@out,"\n");	# so we just skip ELF comments... +		return; +		} +	foreach (@_) +		{ +		if (/^\s*$/) +			{ push(@out,"\n"); } +		else +			{ push(@out,"\t$com_start $_ $com_end\n"); } +		} +	} + +sub ::external_label +{   foreach(@_) { &::LABEL($_,$nmdecor.$_); }   } + +sub ::public_label +{   push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n");   } + +sub ::file_end +{   if ($::macosx) +    {	if (%non_lazy_ptr) +    	{   push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); +	    foreach $i (keys %non_lazy_ptr) +	    {	push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n");   } +	} +    } +    if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { +	my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16"; +	if ($::macosx)	{ push (@out,"$tmp,2\n"); } +	elsif ($::elf)	{ push (@out,"$tmp,4\n"); } +	else		{ push (@out,"$tmp\n"); } +    } +    push(@out,$initseg) if ($initseg); +} + +sub ::data_byte	{   push(@out,".byte\t".join(',',@_)."\n");   } +sub ::data_short{   push(@out,".value\t".join(',',@_)."\n");  } +sub ::data_word {   push(@out,".long\t".join(',',@_)."\n");   } + +sub ::align +{ my $val=$_[0]; +    if ($::aout) +    {	$val=int(log($val)/log(2)); +	$val.=",0x90"; +    } +    push(@out,".align\t$val\n"); +} + +sub ::picmeup +{ my($dst,$sym,$base,$reflabel)=@_; + +    if (($::pic && ($::elf || $::aout)) || $::macosx) +    {	if (!defined($base)) +	{   &::call(&::label("PIC_me_up")); +	    &::set_label("PIC_me_up"); +	    &::blindpop($dst); +	    $base=$dst; +	    $reflabel=&::label("PIC_me_up"); +	} +	if ($::macosx) +	{   my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); +	    &::mov($dst,&::DWP("$indirect-$reflabel",$base)); +	    $non_lazy_ptr{"$nmdecor$sym"}=$indirect; +	} +	elsif ($sym eq "OPENSSL_ia32cap_P" && $::elf>0) +	{   &::lea($dst,&::DWP("$sym-$reflabel",$base));   } +	else +	{   &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", +			    $base)); +	    &::mov($dst,&::DWP("$sym\@GOT",$dst)); +	} +    } +    else +    {	&::lea($dst,&::DWP($sym));	} +} + +sub ::initseg +{ my $f=$nmdecor.shift; + +    if ($::android) +    {	$initseg.=<<___; +.section	.init_array +.align	4 +.long	$f +___ +    } +    elsif ($::elf) +    {	$initseg.=<<___; +.section	.init +	call	$f +___ +    } +    elsif ($::coff) +    {   $initseg.=<<___;	# applies to both Cygwin and Mingw +.section	.ctors +.long	$f +___ +    } +    elsif ($::macosx) +    {	$initseg.=<<___; +.mod_init_func +.align 2 +.long   $f +___ +    } +    elsif ($::aout) +    {	my $ctor="${nmdecor}_GLOBAL_\$I\$$f"; +	$initseg.=".text\n"; +	$initseg.=".type	$ctor,\@function\n" if ($::pic); +	$initseg.=<<___;	# OpenBSD way... +.globl	$ctor +.align	2 +$ctor: +	jmp	$f +___ +    } +} + +sub ::dataseg +{   push(@out,".data\n");   } + +*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf); + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86masm.pl b/openssl-1.1.0h/crypto/perlasm/x86masm.pl new file mode 100644 index 0000000..d352f47 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86masm.pl @@ -0,0 +1,207 @@ +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +package x86masm; + +*out=\@::out; + +$::lbdecor="\$L";	# local label decoration +$nmdecor="_";		# external name decoration + +$initseg=""; +$segment=""; + +sub ::generic +{ my ($opcode,@arg)=@_; + +    # fix hexadecimal constants +    for (@arg) { s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/oi; } + +    if ($opcode =~ /lea/ && @arg[1] =~ s/.*PTR\s+(\(.*\))$/OFFSET $1/)	# no [] +    {	$opcode="mov";	} +    elsif ($opcode !~ /mov[dq]$/) +    {	# fix xmm references +	$arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[-1]=~/\bxmm[0-7]\b/i); +	$arg[-1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); +    } + +    &::emit($opcode,@arg); +  1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::call	{ &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr	{ &::emit("call",@_);	} +sub ::jmp_ptr	{ &::emit("jmp",@_);	} +sub ::lock	{ &::data_byte(0xf0);	} + +sub get_mem +{ my($size,$addr,$reg1,$reg2,$idx)=@_; +  my($post,$ret); + +    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + +    $ret .= "$size PTR " if ($size ne ""); + +    $addr =~ s/^\s+//; +    # prepend global references with optional underscore +    $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; +    # put address arithmetic expression in parenthesis +    $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/); + +    if (($addr ne "") && ($addr ne 0)) +    {	if ($addr !~ /^-/)	{ $ret .= "$addr";  } +	else			{ $post=$addr;      } +    } +    $ret .= "["; + +    if ($reg2 ne "") +    {	$idx!=0 or $idx=1; +	$ret .= "$reg2*$idx"; +	$ret .= "+$reg1" if ($reg1 ne ""); +    } +    else +    {	$ret .= "$reg1";   } + +    $ret .= "$post]"; +    $ret =~ s/\+\]/]/; # in case $addr was the only argument +    $ret =~ s/\[\s*\]//; + +  $ret; +} +sub ::BP	{ &get_mem("BYTE",@_);  } +sub ::WP	{ &get_mem("WORD",@_);	} +sub ::DWP	{ &get_mem("DWORD",@_); } +sub ::QWP	{ &get_mem("QWORD",@_); } +sub ::BC	{ "@_";  } +sub ::DWC	{ "@_"; } + +sub ::file +{ my $tmp=<<___; +TITLE	$_[0].asm +IF \@Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL	FLAT +OPTION	DOTNAME +IF \@Version LT 800 +.text\$	SEGMENT PAGE 'CODE' +ELSE +.text\$	SEGMENT ALIGN(64) 'CODE' +ENDIF +___ +    push(@out,$tmp); +    $segment = ".text\$"; +} + +sub ::function_begin_B +{ my $func=shift; +  my $global=($func !~ /^_/); +  my $begin="${::lbdecor}_${func}_begin"; + +    &::LABEL($func,$global?"$begin":"$nmdecor$func"); +    $func="ALIGN\t16\n".$nmdecor.$func."\tPROC"; + +    if ($global)    { $func.=" PUBLIC\n${begin}::\n"; } +    else	    { $func.=" PRIVATE\n";            } +    push(@out,$func); +    $::stack=4; +} +sub ::function_end_B +{ my $func=shift; + +    push(@out,"$nmdecor$func ENDP\n"); +    $::stack=0; +    &::wipe_labels(); +} + +sub ::file_end +{ my $xmmheader=<<___; +.686 +.XMM +IF \@Version LT 800 +XMMWORD STRUCT 16 +DQ	2 dup (?) +XMMWORD	ENDS +ENDIF +___ +    if (grep {/\b[x]?mm[0-7]\b/i} @out) { +	grep {s/\.[3-7]86/$xmmheader/} @out; +    } + +    push(@out,"$segment	ENDS\n"); + +    if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) +    {	my $comm=<<___; +.bss	SEGMENT 'BSS' +COMM	${nmdecor}OPENSSL_ia32cap_P:DWORD:4 +.bss	ENDS +___ +	# comment out OPENSSL_ia32cap_P declarations +	grep {s/(^EXTERN\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; +	push (@out,$comm); +    } +    push (@out,$initseg) if ($initseg); +    push (@out,"END\n"); +} + +sub ::comment {   foreach (@_) { push(@out,"\t; $_\n"); }   } + +*::set_label_B = sub +{ my $l=shift; push(@out,$l.($l=~/^\Q${::lbdecor}\E[0-9]{3}/?":\n":"::\n")); }; + +sub ::external_label +{   foreach(@_) +    {	push(@out, "EXTERN\t".&::LABEL($_,$nmdecor.$_).":NEAR\n");   } +} + +sub ::public_label +{   push(@out,"PUBLIC\t".&::LABEL($_[0],$nmdecor.$_[0])."\n");   } + +sub ::data_byte +{   push(@out,("DB\t").join(',',splice(@_,0,16))."\n") while(@_);	} + +sub ::data_short +{   push(@out,("DW\t").join(',',splice(@_,0,8))."\n") while(@_);	} + +sub ::data_word +{   push(@out,("DD\t").join(',',splice(@_,0,4))."\n") while(@_);	} + +sub ::align +{   push(@out,"ALIGN\t$_[0]\n");	} + +sub ::picmeup +{ my($dst,$sym)=@_; +    &::lea($dst,&::DWP($sym)); +} + +sub ::initseg +{ my $f=$nmdecor.shift; + +    $initseg.=<<___; +.CRT\$XCU	SEGMENT DWORD PUBLIC 'DATA' +EXTERN	$f:NEAR +DD	$f +.CRT\$XCU	ENDS +___ +} + +sub ::dataseg +{   push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA";   } + +sub ::safeseh +{ my $nm=shift; +    push(@out,"IF \@Version GE 710\n"); +    push(@out,".SAFESEH	".&::LABEL($nm,$nmdecor.$nm)."\n"); +    push(@out,"ENDIF\n"); +} + +1; diff --git a/openssl-1.1.0h/crypto/perlasm/x86nasm.pl b/openssl-1.1.0h/crypto/perlasm/x86nasm.pl new file mode 100644 index 0000000..4b664a8 --- /dev/null +++ b/openssl-1.1.0h/crypto/perlasm/x86nasm.pl @@ -0,0 +1,186 @@ +#! /usr/bin/env perl +# Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License").  You may not use +# this file except in compliance with the License.  You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + + +package x86nasm; + +*out=\@::out; + +$::lbdecor="L\$";		# local label decoration +$nmdecor=$::netware?"":"_";	# external name decoration +$drdecor=$::mwerks?".":"";	# directive decoration + +$initseg=""; + +sub ::generic +{ my $opcode=shift; +  my $tmp; + +    if (!$::mwerks) +    {   if    ($opcode =~ m/^j/o && $#_==0) # optimize jumps +	{   $_[0] = "NEAR $_[0]";   	} +	elsif ($opcode eq "lea" && $#_==1)  # wipe storage qualifier from lea +	{   $_[1] =~ s/^[^\[]*\[/\[/o;	} +	elsif ($opcode eq "clflush" && $#_==0) +	{   $_[0] =~ s/^[^\[]*\[/\[/o;	} +    } +    &::emit($opcode,@_); +  1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::call	{ &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr	{ &::emit("call",@_);	} +sub ::jmp_ptr	{ &::emit("jmp",@_);	} + +sub get_mem +{ my($size,$addr,$reg1,$reg2,$idx)=@_; +  my($post,$ret); + +    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + +    if ($size ne "") +    {	$ret .= "$size"; +	$ret .= " PTR" if ($::mwerks); +	$ret .= " "; +    } +    $ret .= "["; + +    $addr =~ s/^\s+//; +    # prepend global references with optional underscore +    $addr =~ s/^([^\+\-0-9][^\+\-]*)/::islabel($1) or "$nmdecor$1"/ige; +    # put address arithmetic expression in parenthesis +    $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/); + +    if (($addr ne "") && ($addr ne 0)) +    {	if ($addr !~ /^-/)	{ $ret .= "$addr+"; } +	else			{ $post=$addr;      } +    } + +    if ($reg2 ne "") +    {	$idx!=0 or $idx=1; +	$ret .= "$reg2*$idx"; +	$ret .= "+$reg1" if ($reg1 ne ""); +    } +    else +    {	$ret .= "$reg1";   } + +    $ret .= "$post]"; +    $ret =~ s/\+\]/]/; # in case $addr was the only argument + +  $ret; +} +sub ::BP	{ &get_mem("BYTE",@_);  } +sub ::DWP	{ &get_mem("DWORD",@_); } +sub ::WP	{ &get_mem("WORD",@_);	} +sub ::QWP	{ &get_mem("",@_);      } +sub ::BC	{ (($::mwerks)?"":"BYTE ")."@_";  } +sub ::DWC	{ (($::mwerks)?"":"DWORD ")."@_"; } + +sub ::file +{   if ($::mwerks)	{ push(@out,".section\t.text,64\n"); } +    else +    { my $tmp=<<___; +%ifidn __OUTPUT_FORMAT__,obj +section	code	use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +\$\@feat.00 equ 1 +section	.text	code align=64 +%else +section	.text	code +%endif +___ +	push(@out,$tmp); +    } +} + +sub ::function_begin_B +{ my $func=shift; +  my $global=($func !~ /^_/); +  my $begin="${::lbdecor}_${func}_begin"; + +    $begin =~ s/^\@/./ if ($::mwerks);	# the torture never stops + +    &::LABEL($func,$global?"$begin":"$nmdecor$func"); +    $func=$nmdecor.$func; + +    push(@out,"${drdecor}global	$func\n")	if ($global); +    push(@out,"${drdecor}align	16\n"); +    push(@out,"$func:\n"); +    push(@out,"$begin:\n")			if ($global); +    $::stack=4; +} + +sub ::function_end_B +{   $::stack=0; +    &::wipe_labels(); +} + +sub ::file_end +{   if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) +    {	my $comm=<<___; +${drdecor}segment	.bss +${drdecor}common	${nmdecor}OPENSSL_ia32cap_P 16 +___ +	# comment out OPENSSL_ia32cap_P declarations +	grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; +	push (@out,$comm) +    } +    push (@out,$initseg) if ($initseg);		 +} + +sub ::comment {   foreach (@_) { push(@out,"\t; $_\n"); }   } + +sub ::external_label +{   foreach(@_) +    {	push(@out,"${drdecor}extern\t".&::LABEL($_,$nmdecor.$_)."\n");   } +} + +sub ::public_label +{   push(@out,"${drdecor}global\t".&::LABEL($_[0],$nmdecor.$_[0])."\n");  } + +sub ::data_byte +{   push(@out,(($::mwerks)?".byte\t":"db\t").join(',',@_)."\n");	} +sub ::data_short +{   push(@out,(($::mwerks)?".word\t":"dw\t").join(',',@_)."\n");	} +sub ::data_word +{   push(@out,(($::mwerks)?".long\t":"dd\t").join(',',@_)."\n");	} + +sub ::align +{   push(@out,"${drdecor}align\t$_[0]\n");	} + +sub ::picmeup +{ my($dst,$sym)=@_; +    &::lea($dst,&::DWP($sym)); +} + +sub ::initseg +{ my $f=$nmdecor.shift; +    if ($::win32) +    {	$initseg=<<___; +segment	.CRT\$XCU data align=4 +extern	$f +dd	$f +___ +    } +} + +sub ::dataseg +{   if ($mwerks)	{ push(@out,".section\t.data,4\n");   } +    else		{ push(@out,"section\t.data align=4\n"); } +} + +sub ::safeseh +{ my $nm=shift; +    push(@out,"%if	__NASM_VERSION_ID__ >= 0x02030000\n"); +    push(@out,"safeseh	".&::LABEL($nm,$nmdecor.$nm)."\n"); +    push(@out,"%endif\n"); +} + +1; | 
