diff options
author | Bernd Edlinger <bernd.edlinger@hotmail.de> | 2022-06-12 09:37:26 +0200 |
---|---|---|
committer | Tomas Mraz <tomas@openssl.org> | 2022-07-08 11:07:12 +0200 |
commit | 65523758e546fcef0f930e5f8878ef51d174dbc8 (patch) | |
tree | 5ec466231b0a89b50ba94154d34280b5d74701f9 /crypto/aes | |
parent | 5cc9ab5cf51137daf6d2d57718f56316dcb62744 (diff) |
Fix reported performance degradation on aarch64
This restores the implementation prior to
commit 2621751 ("aes/asm/aesv8-armx.pl: avoid 32-bit lane assignment in CTR mode")
for 64bit targets only, since it is reportedly 2-17% slower,
and the silicon errata only affects 32bit targets.
Only for 32bit targets the new algorithm is used.
Fixes #18445
Reviewed-by: Paul Dale <pauli@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/18581)
Diffstat (limited to 'crypto/aes')
-rwxr-xr-x | crypto/aes/asm/aesv8-armx.pl | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl index 2db6012a43..ea74217317 100755 --- a/crypto/aes/asm/aesv8-armx.pl +++ b/crypto/aes/asm/aesv8-armx.pl @@ -1809,6 +1809,21 @@ $code.=<<___; #ifndef __ARMEB__ rev $ctr, $ctr #endif +___ +$code.=<<___ if ($flavour =~ /64/); + vorr $dat1,$dat0,$dat0 + add $tctr1, $ctr, #1 + vorr $dat2,$dat0,$dat0 + add $ctr, $ctr, #2 + vorr $ivec,$dat0,$dat0 + rev $tctr1, $tctr1 + vmov.32 ${dat1}[3],$tctr1 + b.ls .Lctr32_tail + rev $tctr2, $ctr + sub $len,$len,#3 // bias + vmov.32 ${dat2}[3],$tctr2 +___ +$code.=<<___ if ($flavour !~ /64/); add $tctr1, $ctr, #1 vorr $ivec,$dat0,$dat0 rev $tctr1, $tctr1 @@ -2015,11 +2030,25 @@ $code.=<<___; aese $dat1,q8 aesmc $tmp1,$dat1 vld1.8 {$in0},[$inp],#16 +___ +$code.=<<___ if ($flavour =~ /64/); + vorr $dat0,$ivec,$ivec +___ +$code.=<<___ if ($flavour !~ /64/); add $tctr0,$ctr,#1 +___ +$code.=<<___; aese $dat2,q8 aesmc $dat2,$dat2 vld1.8 {$in1},[$inp],#16 +___ +$code.=<<___ if ($flavour =~ /64/); + vorr $dat1,$ivec,$ivec +___ +$code.=<<___ if ($flavour !~ /64/); rev $tctr0,$tctr0 +___ +$code.=<<___; aese $tmp0,q9 aesmc $tmp0,$tmp0 aese $tmp1,q9 @@ -2028,6 +2057,12 @@ $code.=<<___; mov $key_,$key aese $dat2,q9 aesmc $tmp2,$dat2 +___ +$code.=<<___ if ($flavour =~ /64/); + vorr $dat2,$ivec,$ivec + add $tctr0,$ctr,#1 +___ +$code.=<<___; aese $tmp0,q12 aesmc $tmp0,$tmp0 aese $tmp1,q12 @@ -2043,22 +2078,47 @@ $code.=<<___; aese $tmp1,q13 aesmc $tmp1,$tmp1 veor $in2,$in2,$rndlast +___ +$code.=<<___ if ($flavour =~ /64/); + rev $tctr0,$tctr0 + aese $tmp2,q13 + aesmc $tmp2,$tmp2 + vmov.32 ${dat0}[3], $tctr0 +___ +$code.=<<___ if ($flavour !~ /64/); vmov.32 ${ivec}[3], $tctr0 aese $tmp2,q13 aesmc $tmp2,$tmp2 vorr $dat0,$ivec,$ivec +___ +$code.=<<___; rev $tctr1,$tctr1 aese $tmp0,q14 aesmc $tmp0,$tmp0 +___ +$code.=<<___ if ($flavour !~ /64/); vmov.32 ${ivec}[3], $tctr1 rev $tctr2,$ctr +___ +$code.=<<___; aese $tmp1,q14 aesmc $tmp1,$tmp1 +___ +$code.=<<___ if ($flavour =~ /64/); + vmov.32 ${dat1}[3], $tctr1 + rev $tctr2,$ctr + aese $tmp2,q14 + aesmc $tmp2,$tmp2 + vmov.32 ${dat2}[3], $tctr2 +___ +$code.=<<___ if ($flavour !~ /64/); vorr $dat1,$ivec,$ivec vmov.32 ${ivec}[3], $tctr2 aese $tmp2,q14 aesmc $tmp2,$tmp2 vorr $dat2,$ivec,$ivec +___ +$code.=<<___; subs $len,$len,#3 aese $tmp0,q15 aese $tmp1,q15 |