Index: stable/10/crypto/openssl/crypto/perlasm/x86asm.pl =================================================================== --- stable/10/crypto/openssl/crypto/perlasm/x86asm.pl (revision 299982) +++ stable/10/crypto/openssl/crypto/perlasm/x86asm.pl (revision 299983) @@ -1,260 +1,262 @@ #!/usr/bin/env perl # require 'x86asm.pl'; # &asm_init(,"des-586.pl"[,$i386only]); # &function_begin("foo"); # ... # &function_end("foo"); # &asm_finish $out=(); $i386=0; # AUTOLOAD is this context has quite unpleasant side effect, namely # that typos in function calls effectively go to assembler output, # but on the pros side we don't have to implement one subroutine per # each opcode... sub ::AUTOLOAD { my $opcode = $AUTOLOAD; die "more than 4 arguments passed to $opcode" if ($#_>3); $opcode =~ s/.*:://; if ($opcode =~ /^push/) { $stack+=4; } elsif ($opcode =~ /^pop/) { $stack-=4; } &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD"; } sub ::emit { my $opcode=shift; if ($#_==-1) { push(@out,"\t$opcode\n"); } else { push(@out,"\t$opcode\t".join(',',@_)."\n"); } } sub ::LB { $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'"; $1."l"; } sub ::HB { $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'"; $1."h"; } sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num); } sub ::stack_pop { my $num=$_[0]*4; $stack-=$num; &add("esp",$num); } sub ::blindpop { &pop($_[0]); $stack+=4; } sub ::wparam { &DWP($stack+4*$_[0],"esp"); } sub ::swtmp { &DWP(4*$_[0],"esp"); } sub ::bswap { if ($i386) # emulate bswap for i386 { &comment("bswap @_"); &xchg(&HB(@_),&LB(@_)); &ror (@_,16); &xchg(&HB(@_),&LB(@_)); } else { &generic("bswap",@_); } } # These are made-up opcodes introduced over the years essentially # by ignorance, just alias them to real ones... sub ::movb { &mov(@_); } sub ::xorb { &xor(@_); } sub ::rotl { &rol(@_); } sub ::rotr { &ror(@_); } sub ::exch { &xchg(@_); } sub ::halt { &hlt; } sub ::movz { &movzx(@_); } sub ::pushf { &pushfd; } sub ::popf { &popfd; } # 3 argument instructions sub ::movq { my($p1,$p2,$optimize)=@_; if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) # movq between mmx registers can sink Intel CPUs { &::pshufw($p1,$p2,0xe4); } else { &::generic("movq",@_); } } # SSE>2 instructions my %regrm = ( "eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3, "esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7 ); sub ::pextrd { my($dst,$src,$imm)=@_; if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/) { &::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm); } else { &::generic("pextrd",@_); } } sub ::pinsrd { my($dst,$src,$imm)=@_; if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/) { &::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm); } else { &::generic("pinsrd",@_); } } sub ::pshufb { my($dst,$src)=@_; if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) { &data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2); } else { &::generic("pshufb",@_); } } sub ::palignr { my($dst,$src,$imm)=@_; if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) { &::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm); } else { &::generic("palignr",@_); } } sub ::pclmulqdq { my($dst,$src,$imm)=@_; if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) { &::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm); } else { &::generic("pclmulqdq",@_); } } sub ::rdrand { my ($dst)=@_; if ($dst =~ /(e[a-dsd][ixp])/) { &::data_byte(0x0f,0xc7,0xf0|$regrm{$dst}); } else { &::generic("rdrand",@_); } } # label management $lbdecor="L"; # local label decoration, set by package $label="000"; sub ::islabel # see is argument is a known label { my $i; foreach $i (values %label) { return $i if ($i eq $_[0]); } $label{$_[0]}; # can be undef } sub ::label # instantiate a function-scope label { if (!defined($label{$_[0]})) { $label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++; } $label{$_[0]}; } sub ::LABEL # instantiate a file-scope label { $label{$_[0]}=$_[1] if (!defined($label{$_[0]})); $label{$_[0]}; } sub ::static_label { &::LABEL($_[0],$lbdecor.$_[0]); } sub ::set_label_B { push(@out,"@_:\n"); } sub ::set_label { my $label=&::label($_[0]); &::align($_[1]) if ($_[1]>1); &::set_label_B($label); $label; } sub ::wipe_labels # wipes function-scope labels { foreach $i (keys %label) { delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/); } } # subroutine management sub ::function_begin { &function_begin_B(@_); $stack=4; &push("ebp"); &push("ebx"); &push("esi"); &push("edi"); } sub ::function_end { &pop("edi"); &pop("esi"); &pop("ebx"); &pop("ebp"); &ret(); &function_end_B(@_); $stack=0; &wipe_labels(); } sub ::function_end_A { &pop("edi"); &pop("esi"); &pop("ebx"); &pop("ebp"); &ret(); $stack+=16; # readjust esp as if we didn't pop anything } sub ::asciz { my @str=unpack("C*",shift); push @str,0; while ($#str>15) { &data_byte(@str[0..15]); foreach (0..15) { shift @str; } } &data_byte(@str) if (@str); } sub ::asm_finish { &file_end(); print @out; } sub ::asm_init { my ($type,$fn,$cpu)=@_; $filename=$fn; $i386=$cpu; $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; if (($type eq "elf")) { $elf=1; require "x86gas.pl"; } elsif (($type eq "a\.out")) { $aout=1; require "x86gas.pl"; } elsif (($type eq "coff" or $type eq "gaswin")) { $coff=1; require "x86gas.pl"; } elsif (($type eq "win32n")) { $win32=1; require "x86nasm.pl"; } elsif (($type eq "nw-nasm")) { $netware=1; require "x86nasm.pl"; } #elsif (($type eq "nw-mwasm")) #{ $netware=1; $mwerks=1; require "x86nasm.pl"; } elsif (($type eq "win32")) { $win32=1; require "x86masm.pl"; } elsif (($type eq "macosx")) { $aout=1; $macosx=1; require "x86gas.pl"; } elsif (($type eq "android")) { $elf=1; $android=1; require "x86gas.pl"; } else { print STDERR <<"EOF"; Pick one target type from elf - Linux, FreeBSD, Solaris x86, etc. a.out - DJGPP, elder OpenBSD, etc. coff - GAS/COFF such as Win32 targets win32n - Windows 95/Windows NT NASM format nw-nasm - NetWare NASM format macosx - Mac OS X EOF exit(1); } $pic=0; for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } $filename =~ s/\.pl$//; &file($filename); } +sub ::hidden {} + 1; Index: stable/10/crypto/openssl/crypto/perlasm/x86gas.pl =================================================================== --- stable/10/crypto/openssl/crypto/perlasm/x86gas.pl (revision 299982) +++ stable/10/crypto/openssl/crypto/perlasm/x86gas.pl (revision 299983) @@ -1,253 +1,255 @@ #!/usr/bin/env perl package x86gas; *out=\@::out; $::lbdecor=$::aout?"L":".L"; # local label decoration $nmdecor=($::aout or $::coff)?"_":""; # external name decoration $initseg=""; $align=16; $align=log($align)/log(2) if ($::aout); $com_start="#" if ($::aout or $::coff); sub opsize() { my $reg=shift; if ($reg =~ m/^%e/o) { "l"; } elsif ($reg =~ m/^%[a-d][hl]$/o) { "b"; } elsif ($reg =~ m/^%[xm]/o) { undef; } else { "w"; } } # swap arguments; # expand opcode with size suffix; # prefix numeric constants with $; sub ::generic { my($opcode,@arg)=@_; my($suffix,$dst,$src); @arg=reverse(@arg); for (@arg) { s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o; # gp registers s/^([xy]?mm[0-7])$/%$1/o; # xmm/mmx registers s/^(\-?[0-9]+)$/\$$1/o; # constants s/^(\-?0x[0-9a-f]+)$/\$$1/o; # constants } $dst = $arg[$#arg] if ($#arg>=0); $src = $arg[$#arg-1] if ($#arg>=1); if ($dst =~ m/^%/o) { $suffix=&opsize($dst); } elsif ($src =~ m/^%/o) { $suffix=&opsize($src); } else { $suffix="l"; } undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); if ($#_==0) { &::emit($opcode); } elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) { &::emit($opcode,@arg); } else { &::emit($opcode.$suffix,@arg);} 1; } # # opcodes not covered by ::generic above, mostly inconsistent namings... # sub ::movzx { &::movzb(@_); } sub ::pushfd { &::pushfl; } sub ::popfd { &::popfl; } sub ::cpuid { &::emit(".byte\t0x0f,0xa2"); } sub ::rdtsc { &::emit(".byte\t0x0f,0x31"); } sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } sub ::call_ptr { &::generic("call","*$_[0]"); } sub ::jmp_ptr { &::generic("jmp","*$_[0]"); } *::bswap = sub { &::emit("bswap","%$_[0]"); } if (!$::i386); sub ::DWP { my($addr,$reg1,$reg2,$idx)=@_; my $ret=""; $addr =~ s/^\s+//; # prepend global references with optional underscore $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; $reg1 = "%$reg1" if ($reg1); $reg2 = "%$reg2" if ($reg2); $ret .= $addr if (($addr ne "") && ($addr ne 0)); if ($reg2) { $idx!= 0 or $idx=1; $ret .= "($reg1,$reg2,$idx)"; } elsif ($reg1) { $ret .= "($reg1)"; } $ret; } sub ::QWP { &::DWP(@_); } sub ::BP { &::DWP(@_); } sub ::WP { &::DWP(@_); } sub ::BC { @_; } sub ::DWC { @_; } sub ::file { push(@out,".file\t\"$_[0].s\"\n.text\n"); } sub ::function_begin_B { my $func=shift; my $global=($func !~ /^_/); my $begin="${::lbdecor}_${func}_begin"; &::LABEL($func,$global?"$begin":"$nmdecor$func"); $func=$nmdecor.$func; push(@out,".globl\t$func\n") if ($global); if ($::coff) { push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); } elsif (($::aout and !$::pic) or $::macosx) { } else { push(@out,".type $func,\@function\n"); } push(@out,".align\t$align\n"); push(@out,"$func:\n"); push(@out,"$begin:\n") if ($global); $::stack=4; } sub ::function_end_B { my $func=shift; push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf); $::stack=0; &::wipe_labels(); } sub ::comment { if (!defined($com_start) or $::elf) { # Regarding $::elf above... # GNU and SVR4 as'es use different comment delimiters, push(@out,"\n"); # so we just skip ELF comments... return; } foreach (@_) { if (/^\s*$/) { push(@out,"\n"); } else { push(@out,"\t$com_start $_ $com_end\n"); } } } sub ::external_label { foreach(@_) { &::LABEL($_,$nmdecor.$_); } } sub ::public_label { push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } sub ::file_end { if ($::macosx) { if (%non_lazy_ptr) { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); foreach $i (keys %non_lazy_ptr) { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } } } if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; if ($::macosx) { push (@out,"$tmp,2\n"); } elsif ($::elf) { push (@out,"$tmp,4\n"); } else { push (@out,"$tmp\n"); } } push(@out,$initseg) if ($initseg); } sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); } sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } sub ::align { my $val=$_[0],$p2,$i; if ($::aout) { for ($p2=0;$val!=0;$val>>=1) { $p2++; } $val=$p2-1; $val.=",0x90"; } push(@out,".align\t$val\n"); } sub ::picmeup { my($dst,$sym,$base,$reflabel)=@_; if (($::pic && ($::elf || $::aout)) || $::macosx) { if (!defined($base)) { &::call(&::label("PIC_me_up")); &::set_label("PIC_me_up"); &::blindpop($dst); $base=$dst; $reflabel=&::label("PIC_me_up"); } if ($::macosx) { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); &::mov($dst,&::DWP("$indirect-$reflabel",$base)); $non_lazy_ptr{"$nmdecor$sym"}=$indirect; } else { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", $base)); &::mov($dst,&::DWP("$sym\@GOT",$dst)); } } else { &::lea($dst,&::DWP($sym)); } } sub ::initseg { my $f=$nmdecor.shift; if ($::android) { $initseg.=<<___; .section .init_array .align 4 .long $f ___ } elsif ($::elf) { $initseg.=<<___; .section .init call $f ___ } elsif ($::coff) { $initseg.=<<___; # applies to both Cygwin and Mingw .section .ctors .long $f ___ } elsif ($::macosx) { $initseg.=<<___; .mod_init_func .align 2 .long $f ___ } elsif ($::aout) { my $ctor="${nmdecor}_GLOBAL_\$I\$$f"; $initseg.=".text\n"; $initseg.=".type $ctor,\@function\n" if ($::pic); $initseg.=<<___; # OpenBSD way... .globl $ctor .align 2 $ctor: jmp $f ___ } } sub ::dataseg { push(@out,".data\n"); } +*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf); + 1; Index: stable/10/crypto/openssl/crypto/x86cpuid.pl =================================================================== --- stable/10/crypto/openssl/crypto/x86cpuid.pl (revision 299982) +++ stable/10/crypto/openssl/crypto/x86cpuid.pl (revision 299983) @@ -1,358 +1,361 @@ #!/usr/bin/env perl $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC, "${dir}perlasm", "perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],"x86cpuid"); for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &function_begin("OPENSSL_ia32_cpuid"); &xor ("edx","edx"); &pushf (); &pop ("eax"); &mov ("ecx","eax"); &xor ("eax",1<<21); &push ("eax"); &popf (); &pushf (); &pop ("eax"); &xor ("ecx","eax"); &xor ("eax","eax"); &bt ("ecx",21); &jnc (&label("nocpuid")); &cpuid (); &mov ("edi","eax"); # max value for standard query level &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); &mov ("ebp","eax"); &cmp ("edx",0x49656e69); # "ineI" &setne (&LB("eax")); &or ("ebp","eax"); &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); &or ("ebp","eax"); # 0 indicates Intel CPU &jz (&label("intel")); &cmp ("ebx",0x68747541); # "Auth" &setne (&LB("eax")); &mov ("esi","eax"); &cmp ("edx",0x69746E65); # "enti" &setne (&LB("eax")); &or ("esi","eax"); &cmp ("ecx",0x444D4163); # "cAMD" &setne (&LB("eax")); &or ("esi","eax"); # 0 indicates AMD CPU &jnz (&label("intel")); # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000001); &jb (&label("intel")); &mov ("esi","eax"); &mov ("eax",0x80000001); &cpuid (); &or ("ebp","ecx"); &and ("ebp",1<<11|1); # isolate XOP bit &cmp ("esi",0x80000008); &jb (&label("intel")); &mov ("eax",0x80000008); &cpuid (); &movz ("esi",&LB("ecx")); # number of cores - 1 &inc ("esi"); # number of cores &mov ("eax",1); &xor ("ecx","ecx"); &cpuid (); &bt ("edx",28); &jnc (&label("generic")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit &jmp (&label("generic")); &set_label("intel"); &cmp ("edi",4); &mov ("edi",-1); &jb (&label("nocacheinfo")); &mov ("eax",4); &mov ("ecx",0); # query L1D &cpuid (); &mov ("edi","eax"); &shr ("edi",14); &and ("edi",0xfff); # number of cores -1 per L1D &set_label("nocacheinfo"); &mov ("eax",1); &xor ("ecx","ecx"); &cpuid (); &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 &cmp ("ebp",0); &jne (&label("notintel")); &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs &and (&HB("eax"),15); # familiy ID &cmp (&HB("eax"),15); # P4? &jne (&label("notintel")); &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR &set_label("notintel"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("generic")); &and ("edx",0xefffffff); &cmp ("edi",0); &je (&label("generic")); &or ("edx",0x10000000); &shr ("ebx",16); &cmp (&LB("ebx"),1); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("generic"); &and ("ebp",1<<11); # isolate AMD XOP flag &and ("ecx",0xfffff7ff); # force 11th bit to 0 &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag &bt ("ecx",27); # check OSXSAVE bit &jnc (&label("clear_avx")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv &and ("eax",6); &cmp ("eax",6); &je (&label("done")); &cmp ("eax",2); &je (&label("clear_avx")); &set_label("clear_xmm"); &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits &set_label("done"); &mov ("eax","esi"); &mov ("edx","ebp"); &set_label("nocpuid"); &function_end("OPENSSL_ia32_cpuid"); &external_label("OPENSSL_ia32cap_P"); &function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); &xor ("eax","eax"); &xor ("edx","edx"); &picmeup("ecx","OPENSSL_ia32cap_P"); &bt (&DWP(0,"ecx"),4); &jnc (&label("notsc")); &rdtsc (); &set_label("notsc"); &ret (); &function_end_B("OPENSSL_rdtsc"); # This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], # but it's safe to call it on any [supported] 32-bit platform... # Just check for [non-]zero return value... &function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); &picmeup("ecx","OPENSSL_ia32cap_P"); &bt (&DWP(0,"ecx"),4); &jnc (&label("nohalt")); # no TSC &data_word(0x9058900e); # push %cs; pop %eax &and ("eax",3); &jnz (&label("nohalt")); # not enough privileges &pushf (); &pop ("eax"); &bt ("eax",9); &jnc (&label("nohalt")); # interrupts are disabled &rdtsc (); &push ("edx"); &push ("eax"); &halt (); &rdtsc (); &sub ("eax",&DWP(0,"esp")); &sbb ("edx",&DWP(4,"esp")); &add ("esp",8); &ret (); &set_label("nohalt"); &xor ("eax","eax"); &xor ("edx","edx"); &ret (); &function_end_B("OPENSSL_instrument_halt"); # Essentially there is only one use for this function. Under DJGPP: # # #include # ... # i=OPENSSL_far_spin(_dos_ds,0x46c); # ... # to obtain the number of spins till closest timer interrupt. &function_begin_B("OPENSSL_far_spin"); &pushf (); &pop ("eax") &bt ("eax",9); &jnc (&label("nospin")); # interrupts are disabled &mov ("eax",&DWP(4,"esp")); &mov ("ecx",&DWP(8,"esp")); &data_word (0x90d88e1e); # push %ds, mov %eax,%ds &xor ("eax","eax"); &mov ("edx",&DWP(0,"ecx")); &jmp (&label("spin")); &align (16); &set_label("spin"); &inc ("eax"); &cmp ("edx",&DWP(0,"ecx")); &je (&label("spin")); &data_word (0x1f909090); # pop %ds &ret (); &set_label("nospin"); &xor ("eax","eax"); &xor ("edx","edx"); &ret (); &function_end_B("OPENSSL_far_spin"); &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); &xor ("eax","eax"); &xor ("edx","edx"); &picmeup("ecx","OPENSSL_ia32cap_P"); &mov ("ecx",&DWP(0,"ecx")); &bt (&DWP(0,"ecx"),1); &jnc (&label("no_x87")); if ($sse2) { &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits &cmp ("ecx",1<<26|1<<24); &jne (&label("no_sse2")); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); &pxor ("xmm3","xmm3"); &pxor ("xmm4","xmm4"); &pxor ("xmm5","xmm5"); &pxor ("xmm6","xmm6"); &pxor ("xmm7","xmm7"); &set_label("no_sse2"); } # just a bunch of fldz to zap the fp/mm bank followed by finit... &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); &set_label("no_x87"); &lea ("eax",&DWP(4,"esp")); &ret (); &function_end_B("OPENSSL_wipe_cpu"); &function_begin_B("OPENSSL_atomic_add"); &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg &push ("ebx"); &nop (); &mov ("eax",&DWP(0,"edx")); &set_label("spin"); &lea ("ebx",&DWP(0,"eax","ecx")); &nop (); &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded &jne (&label("spin")); &mov ("eax","ebx"); # OpenSSL expects the new value &pop ("ebx"); &ret (); &function_end_B("OPENSSL_atomic_add"); # This function can become handy under Win32 in situations when # we don't know which calling convention, __stdcall or __cdecl(*), # indirect callee is using. In C it can be deployed as # #ifdef OPENSSL_CPUID_OBJ # type OPENSSL_indirect_call(void *f,...); # ... # OPENSSL_indirect_call(func,[up to $max arguments]); #endif # # (*) it's designed to work even for __fastcall if number of # arguments is 1 or 2! &function_begin_B("OPENSSL_indirect_call"); { my ($max,$i)=(7,); # $max has to be chosen as 4*n-1 # in order to preserve eventual # stack alignment &push ("ebp"); &mov ("ebp","esp"); &sub ("esp",$max*4); &mov ("ecx",&DWP(12,"ebp")); &mov (&DWP(0,"esp"),"ecx"); &mov ("edx",&DWP(16,"ebp")); &mov (&DWP(4,"esp"),"edx"); for($i=2;$i<$max;$i++) { # Some copies will be redundant/bogus... &mov ("eax",&DWP(12+$i*4,"ebp")); &mov (&DWP(0+$i*4,"esp"),"eax"); } &call_ptr (&DWP(8,"ebp"));# make the call... &mov ("esp","ebp"); # ... and just restore the stack pointer # without paying attention to what we called, # (__cdecl *func) or (__stdcall *one). &pop ("ebp"); &ret (); } &function_end_B("OPENSSL_indirect_call"); &function_begin_B("OPENSSL_cleanse"); &mov ("edx",&wparam(0)); &mov ("ecx",&wparam(1)); &xor ("eax","eax"); &cmp ("ecx",7); &jae (&label("lot")); &cmp ("ecx",0); &je (&label("ret")); &set_label("little"); &mov (&BP(0,"edx"),"al"); &sub ("ecx",1); &lea ("edx",&DWP(1,"edx")); &jnz (&label("little")); &set_label("ret"); &ret (); &set_label("lot",16); &test ("edx",3); &jz (&label("aligned")); &mov (&BP(0,"edx"),"al"); &lea ("ecx",&DWP(-1,"ecx")); &lea ("edx",&DWP(1,"edx")); &jmp (&label("lot")); &set_label("aligned"); &mov (&DWP(0,"edx"),"eax"); &lea ("ecx",&DWP(-4,"ecx")); &test ("ecx",-4); &lea ("edx",&DWP(4,"edx")); &jnz (&label("aligned")); &cmp ("ecx",0); &jne (&label("little")); &ret (); &function_end_B("OPENSSL_cleanse"); &function_begin_B("OPENSSL_ia32_rdrand"); &mov ("ecx",8); &set_label("loop"); &rdrand ("eax"); &jc (&label("break")); &loop (&label("loop")); &set_label("break"); &cmp ("eax",0); &cmove ("eax","ecx"); &ret (); &function_end_B("OPENSSL_ia32_rdrand"); &initseg("OPENSSL_cpuid_setup"); +&hidden("OPENSSL_cpuid_setup"); +&hidden("OPENSSL_ia32cap_P"); + &asm_finish(); Index: stable/10/secure/lib/libcrypto/i386/x86cpuid.S =================================================================== --- stable/10/secure/lib/libcrypto/i386/x86cpuid.S (revision 299982) +++ stable/10/secure/lib/libcrypto/i386/x86cpuid.S (revision 299983) @@ -1,685 +1,689 @@ # $FreeBSD$ # Do not modify. This file is auto-generated from x86cpuid.pl. #ifdef PIC .file "x86cpuid.S" .text .globl OPENSSL_ia32_cpuid .type OPENSSL_ia32_cpuid,@function .align 16 OPENSSL_ia32_cpuid: .L_OPENSSL_ia32_cpuid_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edx,%edx pushfl popl %eax movl %eax,%ecx xorl $2097152,%eax pushl %eax popfl pushfl popl %eax xorl %eax,%ecx xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax cmpl $1970169159,%ebx setne %al movl %eax,%ebp cmpl $1231384169,%edx setne %al orl %eax,%ebp cmpl $1818588270,%ecx setne %al orl %eax,%ebp jz .L001intel cmpl $1752462657,%ebx setne %al movl %eax,%esi cmpl $1769238117,%edx setne %al orl %eax,%esi cmpl $1145913699,%ecx setne %al orl %eax,%esi jnz .L001intel movl $2147483648,%eax .byte 0x0f,0xa2 cmpl $2147483649,%eax jb .L001intel movl %eax,%esi movl $2147483649,%eax .byte 0x0f,0xa2 orl %ecx,%ebp andl $2049,%ebp cmpl $2147483656,%esi jb .L001intel movl $2147483656,%eax .byte 0x0f,0xa2 movzbl %cl,%esi incl %esi movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 btl $28,%edx jnc .L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx ja .L002generic andl $4026531839,%edx jmp .L002generic .L001intel: cmpl $4,%edi movl $-1,%edi jb .L003nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi .L003nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp jne .L004notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah jne .L004notintel orl $1048576,%edx .L004notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx cmpl $0,%edi je .L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl ja .L002generic andl $4026531839,%edx .L002generic: andl $2048,%ebp andl $4294965247,%ecx movl %edx,%esi orl %ecx,%ebp btl $27,%ecx jnc .L005clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax je .L006done cmpl $2,%eax je .L005clear_avx .L007clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi .L005clear_avx: andl $4026525695,%ebp .L006done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: popl %edi popl %esi popl %ebx popl %ebp ret .size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin .globl OPENSSL_rdtsc .type OPENSSL_rdtsc,@function .align 16 OPENSSL_rdtsc: .L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx call .L008PIC_me_up .L008PIC_me_up: popl %ecx leal _GLOBAL_OFFSET_TABLE_+[.-.L008PIC_me_up](%ecx),%ecx movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx btl $4,(%ecx) jnc .L009notsc .byte 0x0f,0x31 .L009notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt .type OPENSSL_instrument_halt,@function .align 16 OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: call .L010PIC_me_up .L010PIC_me_up: popl %ecx leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%ecx),%ecx movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx btl $4,(%ecx) jnc .L011nohalt .long 2421723150 andl $3,%eax jnz .L011nohalt pushfl popl %eax btl $9,%eax jnc .L011nohalt .byte 0x0f,0x31 pushl %edx pushl %eax hlt .byte 0x0f,0x31 subl (%esp),%eax sbbl 4(%esp),%edx addl $8,%esp ret .L011nohalt: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin .globl OPENSSL_far_spin .type OPENSSL_far_spin,@function .align 16 OPENSSL_far_spin: .L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax jnc .L012nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx jmp .L013spin .align 16 .L013spin: incl %eax cmpl (%ecx),%edx je .L013spin .long 529567888 ret .L012nospin: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 16 OPENSSL_wipe_cpu: .L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx call .L014PIC_me_up .L014PIC_me_up: popl %ecx leal _GLOBAL_OFFSET_TABLE_+[.-.L014PIC_me_up](%ecx),%ecx movl OPENSSL_ia32cap_P@GOT(%ecx),%ecx movl (%ecx),%ecx btl $1,(%ecx) jnc .L015no_x87 andl $83886080,%ecx cmpl $83886080,%ecx jne .L016no_sse2 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .L016no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 .L015no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 OPENSSL_atomic_add: .L_OPENSSL_atomic_add_begin: movl 4(%esp),%edx movl 8(%esp),%ecx pushl %ebx nop movl (%edx),%eax .L017spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 jne .L017spin movl %ebx,%eax popl %ebx ret .size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin .globl OPENSSL_indirect_call .type OPENSSL_indirect_call,@function .align 16 OPENSSL_indirect_call: .L_OPENSSL_indirect_call_begin: pushl %ebp movl %esp,%ebp subl $28,%esp movl 12(%ebp),%ecx movl %ecx,(%esp) movl 16(%ebp),%edx movl %edx,4(%esp) movl 20(%ebp),%eax movl %eax,8(%esp) movl 24(%ebp),%eax movl %eax,12(%esp) movl 28(%ebp),%eax movl %eax,16(%esp) movl 32(%ebp),%eax movl %eax,20(%esp) movl 36(%ebp),%eax movl %eax,24(%esp) call *8(%ebp) movl %ebp,%esp popl %ebp ret .size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin .globl OPENSSL_cleanse .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: .L_OPENSSL_cleanse_begin: movl 4(%esp),%edx movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx jae .L018lot cmpl $0,%ecx je .L019ret .L020little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx jnz .L020little .L019ret: ret .align 16 .L018lot: testl $3,%edx jz .L021aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx jmp .L018lot .L021aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx jnz .L021aligned cmpl $0,%ecx jne .L020little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand .type OPENSSL_ia32_rdrand,@function .align 16 OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx .L022loop: .byte 15,199,240 jc .L023break loop .L022loop .L023break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.hidden OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P .comm OPENSSL_ia32cap_P,8,4 .section .init call OPENSSL_cpuid_setup #else .file "x86cpuid.S" .text .globl OPENSSL_ia32_cpuid .type OPENSSL_ia32_cpuid,@function .align 16 OPENSSL_ia32_cpuid: .L_OPENSSL_ia32_cpuid_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi xorl %edx,%edx pushfl popl %eax movl %eax,%ecx xorl $2097152,%eax pushl %eax popfl pushfl popl %eax xorl %eax,%ecx xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax cmpl $1970169159,%ebx setne %al movl %eax,%ebp cmpl $1231384169,%edx setne %al orl %eax,%ebp cmpl $1818588270,%ecx setne %al orl %eax,%ebp jz .L001intel cmpl $1752462657,%ebx setne %al movl %eax,%esi cmpl $1769238117,%edx setne %al orl %eax,%esi cmpl $1145913699,%ecx setne %al orl %eax,%esi jnz .L001intel movl $2147483648,%eax .byte 0x0f,0xa2 cmpl $2147483649,%eax jb .L001intel movl %eax,%esi movl $2147483649,%eax .byte 0x0f,0xa2 orl %ecx,%ebp andl $2049,%ebp cmpl $2147483656,%esi jb .L001intel movl $2147483656,%eax .byte 0x0f,0xa2 movzbl %cl,%esi incl %esi movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 btl $28,%edx jnc .L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx ja .L002generic andl $4026531839,%edx jmp .L002generic .L001intel: cmpl $4,%edi movl $-1,%edi jb .L003nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi .L003nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp jne .L004notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah jne .L004notintel orl $1048576,%edx .L004notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx cmpl $0,%edi je .L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl ja .L002generic andl $4026531839,%edx .L002generic: andl $2048,%ebp andl $4294965247,%ecx movl %edx,%esi orl %ecx,%ebp btl $27,%ecx jnc .L005clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax je .L006done cmpl $2,%eax je .L005clear_avx .L007clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi .L005clear_avx: andl $4026525695,%ebp .L006done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: popl %edi popl %esi popl %ebx popl %ebp ret .size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin .globl OPENSSL_rdtsc .type OPENSSL_rdtsc,@function .align 16 OPENSSL_rdtsc: .L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) jnc .L008notsc .byte 0x0f,0x31 .L008notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt .type OPENSSL_instrument_halt,@function .align 16 OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) jnc .L009nohalt .long 2421723150 andl $3,%eax jnz .L009nohalt pushfl popl %eax btl $9,%eax jnc .L009nohalt .byte 0x0f,0x31 pushl %edx pushl %eax hlt .byte 0x0f,0x31 subl (%esp),%eax sbbl 4(%esp),%edx addl $8,%esp ret .L009nohalt: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin .globl OPENSSL_far_spin .type OPENSSL_far_spin,@function .align 16 OPENSSL_far_spin: .L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax jnc .L010nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx jmp .L011spin .align 16 .L011spin: incl %eax cmpl (%ecx),%edx je .L011spin .long 529567888 ret .L010nospin: xorl %eax,%eax xorl %edx,%edx ret .size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 16 OPENSSL_wipe_cpu: .L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx movl (%ecx),%ecx btl $1,(%ecx) jnc .L012no_x87 andl $83886080,%ecx cmpl $83886080,%ecx jne .L013no_sse2 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 pxor %xmm3,%xmm3 pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 .L013no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 .L012no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 OPENSSL_atomic_add: .L_OPENSSL_atomic_add_begin: movl 4(%esp),%edx movl 8(%esp),%ecx pushl %ebx nop movl (%edx),%eax .L014spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 jne .L014spin movl %ebx,%eax popl %ebx ret .size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin .globl OPENSSL_indirect_call .type OPENSSL_indirect_call,@function .align 16 OPENSSL_indirect_call: .L_OPENSSL_indirect_call_begin: pushl %ebp movl %esp,%ebp subl $28,%esp movl 12(%ebp),%ecx movl %ecx,(%esp) movl 16(%ebp),%edx movl %edx,4(%esp) movl 20(%ebp),%eax movl %eax,8(%esp) movl 24(%ebp),%eax movl %eax,12(%esp) movl 28(%ebp),%eax movl %eax,16(%esp) movl 32(%ebp),%eax movl %eax,20(%esp) movl 36(%ebp),%eax movl %eax,24(%esp) call *8(%ebp) movl %ebp,%esp popl %ebp ret .size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin .globl OPENSSL_cleanse .type OPENSSL_cleanse,@function .align 16 OPENSSL_cleanse: .L_OPENSSL_cleanse_begin: movl 4(%esp),%edx movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx jae .L015lot cmpl $0,%ecx je .L016ret .L017little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx jnz .L017little .L016ret: ret .align 16 .L015lot: testl $3,%edx jz .L018aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx jmp .L015lot .L018aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx jnz .L018aligned cmpl $0,%ecx jne .L017little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand .type OPENSSL_ia32_rdrand,@function .align 16 OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx .L019loop: .byte 15,199,240 jc .L020break loop .L019loop .L020break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.hidden OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P .comm OPENSSL_ia32cap_P,8,4 .section .init call OPENSSL_cpuid_setup #endif