diff options
Diffstat (limited to 'crypto/modes/asm/ghash-x86.pl')
-rw-r--r-- | crypto/modes/asm/ghash-x86.pl | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl index 6b09669..2426cd0 100644 --- a/crypto/modes/asm/ghash-x86.pl +++ b/crypto/modes/asm/ghash-x86.pl @@ -346,7 +346,7 @@ $S=12; # shift factor for rem_4bit # effective address calculation and finally merge of value to Z.hi. # Reference to rem_4bit is scheduled so late that I had to >>4 # rem_4bit elements. This resulted in 20-45% procent improvement -# on contemporary µ-archs. +# on contemporary µ-archs. { my $cnt; my $rem_4bit = "eax"; @@ -635,7 +635,7 @@ sub mmx_loop() { { my @lo = ("mm0","mm1","mm2"); my @hi = ("mm3","mm4","mm5"); my @tmp = ("mm6","mm7"); - my $off1=0,$off2=0,$i; + my ($off1,$off2,$i) = (0,0,); &add ($Htbl,128); # optimize for size &lea ("edi",&DWP(16+128,"esp")); @@ -883,7 +883,7 @@ sub reduction_alg9 { # 17/13 times faster than Intel version my ($Xhi,$Xi) = @_; # 1st phase - &movdqa ($T1,$Xi) # + &movdqa ($T1,$Xi); # &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # @@ -1019,7 +1019,7 @@ my ($Xhi,$Xi) = @_; &movdqa ($Xhn,$Xn); &pxor ($Xhi,$T1); # "Ii+Xi", consume early - &movdqa ($T1,$Xi) #&reduction_alg9($Xhi,$Xi); 1st phase + &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # |