Tweak arm64 perlasm to move all data from .text to .rodata.
This allows us to drop -Wl,--no-execute-only on arm64. ok tb@
This commit is contained in:
parent
c11f174fce
commit
e44882bdb7
|
@ -5,7 +5,7 @@ V= 3.0.7
|
|||
PKGNAME= openssl-${V}
|
||||
PKGSPEC= openssl->=3v0,<3.1v0
|
||||
EPOCH= 0
|
||||
REVISION= 2
|
||||
REVISION= 3
|
||||
|
||||
SHLIBVER= 12.2
|
||||
SHARED_LIBS= crypto ${SHLIBVER} \
|
||||
|
@ -38,7 +38,9 @@ MAN_PREFIX= @man lib/eopenssl30/man
|
|||
INSTALL_TARGET+= install_man_docs
|
||||
.endif
|
||||
|
||||
.if ${MACHINE_ARCH} != aarch64
|
||||
USE_NOEXECONLY = Yes
|
||||
.endif
|
||||
|
||||
# install to unusual directory name - this port is *not* intended to be
|
||||
# picked up by configure scripts without explicitly CPPFLAGS/LDFLAGS.
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
Index: crypto/aes/asm/aesv8-armx.pl
|
||||
--- crypto/aes/asm/aesv8-armx.pl.orig
|
||||
+++ crypto/aes/asm/aesv8-armx.pl
|
||||
@@ -107,11 +107,13 @@ my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
+.rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
+.previous
|
||||
|
||||
.globl ${prefix}_set_encrypt_key
|
||||
.type ${prefix}_set_encrypt_key,%function
|
||||
@@ -137,7 +139,8 @@ $code.=<<___;
|
||||
tst $bits,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
- adr $ptr,.Lrcon
|
||||
+ adrp $ptr,.Lrcon
|
||||
+ add $ptr,$ptr,:lo12:.Lrcon
|
||||
cmp $bits,#192
|
||||
|
||||
veor $zero,$zero,$zero
|
150
security/openssl/3.0/patches/patch-crypto_aes_asm_vpaes-armv8_pl
Normal file
150
security/openssl/3.0/patches/patch-crypto_aes_asm_vpaes-armv8_pl
Normal file
|
@ -0,0 +1,150 @@
|
|||
Index: crypto/aes/asm/vpaes-armv8.pl
|
||||
--- crypto/aes/asm/vpaes-armv8.pl.orig
|
||||
+++ crypto/aes/asm/vpaes-armv8.pl
|
||||
@@ -53,7 +53,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$code.=<<___;
|
||||
-.text
|
||||
+.rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
.align 7 // totally strategic alignment
|
||||
@@ -144,6 +144,9 @@ _vpaes_consts:
|
||||
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
.align 6
|
||||
+
|
||||
+.text
|
||||
+
|
||||
___
|
||||
|
||||
{
|
||||
@@ -163,7 +166,8 @@ $code.=<<___;
|
||||
.type _vpaes_encrypt_preheat,%function
|
||||
.align 4
|
||||
_vpaes_encrypt_preheat:
|
||||
- adr x10, .Lk_inv
|
||||
+ adrp x10, .Lk_inv
|
||||
+ add x10, x10, :lo12:.Lk_inv
|
||||
movi v17.16b, #0x0f
|
||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
|
||||
@@ -191,7 +195,9 @@ _vpaes_encrypt_preheat:
|
||||
_vpaes_encrypt_core:
|
||||
mov x9, $key
|
||||
ldr w8, [$key,#240] // pull rounds
|
||||
- adr x11, .Lk_mc_forward+16
|
||||
+ adrp x11, .Lk_mc_forward
|
||||
+ add x11, x11, :lo12:.Lk_mc_forward
|
||||
+ add x11, x11, #16
|
||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@@ -278,7 +284,9 @@ vpaes_encrypt:
|
||||
_vpaes_encrypt_2x:
|
||||
mov x9, $key
|
||||
ldr w8, [$key,#240] // pull rounds
|
||||
- adr x11, .Lk_mc_forward+16
|
||||
+ adrp x11, .Lk_mc_forward
|
||||
+ add x11, x11, :lo12:.Lk_mc_forward
|
||||
+ add x11, x11, #16
|
||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@@ -381,9 +389,11 @@ _vpaes_encrypt_2x:
|
||||
.type _vpaes_decrypt_preheat,%function
|
||||
.align 4
|
||||
_vpaes_decrypt_preheat:
|
||||
- adr x10, .Lk_inv
|
||||
+ adrp x10, .Lk_inv
|
||||
+ add x10, x10, :lo12:.Lk_inv
|
||||
movi v17.16b, #0x0f
|
||||
- adr x11, .Lk_dipt
|
||||
+ adrp x11, .Lk_dipt
|
||||
+ add x11, x11, :lo12:.Lk_dipt
|
||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
|
||||
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
|
||||
@@ -405,10 +415,13 @@ _vpaes_decrypt_core:
|
||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||
- adr x10, .Lk_sr
|
||||
+ adrp x10, .Lk_sr
|
||||
+ add x10, x10, :lo12:.Lk_sr
|
||||
and x11, x11, #0x30 // and \$0x30, %r11
|
||||
add x11, x11, x10
|
||||
- adr x10, .Lk_mc_forward+48
|
||||
+ adrp x10, .Lk_mc_forward
|
||||
+ add x10, x10, :lo12:.Lk_mc_forward
|
||||
+ add x10, x10, #48
|
||||
|
||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@@ -516,10 +529,13 @@ _vpaes_decrypt_2x:
|
||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||
- adr x10, .Lk_sr
|
||||
+ adrp x10, .Lk_sr
|
||||
+ add x10, x10, :lo12:.Lk_sr
|
||||
and x11, x11, #0x30 // and \$0x30, %r11
|
||||
add x11, x11, x10
|
||||
- adr x10, .Lk_mc_forward+48
|
||||
+ adrp x10, .Lk_mc_forward
|
||||
+ add x10, x10, :lo12:.Lk_mc_forward
|
||||
+ add x10, x10, #48
|
||||
|
||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||
@@ -655,14 +671,18 @@ $code.=<<___;
|
||||
.type _vpaes_key_preheat,%function
|
||||
.align 4
|
||||
_vpaes_key_preheat:
|
||||
- adr x10, .Lk_inv
|
||||
+ adrp x10, .Lk_inv
|
||||
+ add x10, x10, :lo12:.Lk_inv
|
||||
movi v16.16b, #0x5b // .Lk_s63
|
||||
- adr x11, .Lk_sb1
|
||||
+ adrp x11, .Lk_sb1
|
||||
+ add x11, x11, :lo12:.Lk_sb1
|
||||
movi v17.16b, #0x0f // .Lk_s0F
|
||||
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
|
||||
- adr x10, .Lk_dksd
|
||||
+ adrp x10, .Lk_dksd
|
||||
+ add x10, x10, :lo12:.Lk_dksd
|
||||
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
|
||||
- adr x11, .Lk_mc_forward
|
||||
+ adrp x11, .Lk_mc_forward
|
||||
+ add x11, x11, :lo12:.Lk_mc_forward
|
||||
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
|
||||
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
|
||||
ld1 {v8.2d}, [x10] // .Lk_rcon
|
||||
@@ -686,7 +706,8 @@ _vpaes_schedule_core:
|
||||
bl _vpaes_schedule_transform
|
||||
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
|
||||
|
||||
- adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
||||
+ adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
||||
+ add x10, x10, :lo12:.Lk_sr
|
||||
add x8, x8, x10
|
||||
cbnz $dir, .Lschedule_am_decrypting
|
||||
|
||||
@@ -812,12 +833,14 @@ _vpaes_schedule_core:
|
||||
.align 4
|
||||
.Lschedule_mangle_last:
|
||||
// schedule last round key from xmm0
|
||||
- adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
+ adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||
+ add x11, x11, :lo12:.Lk_deskew
|
||||
cbnz $dir, .Lschedule_mangle_last_dec
|
||||
|
||||
// encrypting
|
||||
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
|
||||
- adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||
+ adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||
+ add x11, x11, :lo12:.Lk_opt
|
||||
add $out, $out, #32 // add \$32, %rdx
|
||||
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
Index: crypto/bn/asm/armv8-mont.pl
|
||||
--- crypto/bn/asm/armv8-mont.pl.orig
|
||||
+++ crypto/bn/asm/armv8-mont.pl
|
||||
@@ -1889,6 +1889,7 @@ __bn_mul4x_mont:
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
+.rodata
|
||||
.asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
|
@ -0,0 +1,51 @@
|
|||
Index: crypto/chacha/asm/chacha-armv8.pl
|
||||
--- crypto/chacha/asm/chacha-armv8.pl.orig
|
||||
+++ crypto/chacha/asm/chacha-armv8.pl
|
||||
@@ -138,7 +138,7 @@ $code.=<<___;
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
|
||||
-.text
|
||||
+.rodata
|
||||
|
||||
.align 5
|
||||
.Lsigma:
|
||||
@@ -149,6 +149,8 @@ $code.=<<___;
|
||||
.long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f
|
||||
.asciz "ChaCha20 for ARMv8, CRYPTOGAMS by \@dot-asm"
|
||||
|
||||
+.text
|
||||
+
|
||||
.globl ChaCha20_ctr32
|
||||
.type ChaCha20_ctr32,%function
|
||||
.align 5
|
||||
@@ -169,7 +171,8 @@ ChaCha20_ctr32:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
- adr @x[0],.Lsigma
|
||||
+ adrp @x[0],.Lsigma
|
||||
+ add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
@@ -437,7 +440,8 @@ ChaCha20_neon:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
- adr @x[0],.Lsigma
|
||||
+ adrp @x[0],.Lsigma
|
||||
+ add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
@@ -848,7 +852,8 @@ ChaCha20_512_neon:
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
- adr @x[0],.Lsigma
|
||||
+ adrp @x[0],.Lsigma
|
||||
+ add @x[0],@x[0],:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
|
@ -0,0 +1,233 @@
|
|||
Index: crypto/ec/asm/ecp_nistz256-armv8.pl
|
||||
--- crypto/ec/asm/ecp_nistz256-armv8.pl.orig
|
||||
+++ crypto/ec/asm/ecp_nistz256-armv8.pl
|
||||
@@ -55,7 +55,7 @@ my ($acc6,$acc7)=($ap,$bp); # used in __ecp_nistz256_s
|
||||
$code.=<<___;
|
||||
#include "arm_arch.h"
|
||||
|
||||
-.text
|
||||
+.rodata
|
||||
___
|
||||
########################################################################
|
||||
# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7
|
||||
@@ -117,6 +117,8 @@ $code.=<<___;
|
||||
.quad 0xccd1c8aaee00bc4f
|
||||
.asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
|
||||
+.text
|
||||
+
|
||||
// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]);
|
||||
.globl ecp_nistz256_to_mont
|
||||
.type ecp_nistz256_to_mont,%function
|
||||
@@ -127,12 +129,16 @@ ecp_nistz256_to_mont:
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
|
||||
- ldr $bi,.LRR // bp[0]
|
||||
+ adrp $bi,.LRR
|
||||
+ ldr $bi,[$bi,:lo12:.LRR] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
- adr $bp,.LRR // &bp[0]
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
+ adrp $bp,.LRR // &bp[0]
|
||||
+ add $bp,$bp,:lo12:.LRR
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@@ -155,9 +161,12 @@ ecp_nistz256_from_mont:
|
||||
mov $bi,#1 // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
- adr $bp,.Lone // &bp[0]
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
+ adrp $bp,.Lone // &bp[0]
|
||||
+ add $bp,$bp,:lo12:.Lone
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@@ -181,8 +190,10 @@ ecp_nistz256_mul_mont:
|
||||
ldr $bi,[$bp] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_mul_mont
|
||||
|
||||
@@ -204,8 +215,10 @@ ecp_nistz256_sqr_mont:
|
||||
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sqr_mont
|
||||
|
||||
@@ -229,8 +242,10 @@ ecp_nistz256_add:
|
||||
ldp $t0,$t1,[$bp]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
ldp $t2,$t3,[$bp,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_add
|
||||
|
||||
@@ -250,8 +265,10 @@ ecp_nistz256_div_by_2:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_div_by_2
|
||||
|
||||
@@ -271,8 +288,10 @@ ecp_nistz256_mul_by_2:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
mov $t0,$acc0
|
||||
mov $t1,$acc1
|
||||
mov $t2,$acc2
|
||||
@@ -296,8 +315,10 @@ ecp_nistz256_mul_by_3:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
mov $t0,$acc0
|
||||
mov $t1,$acc1
|
||||
mov $t2,$acc2
|
||||
@@ -333,8 +354,10 @@ ecp_nistz256_sub:
|
||||
|
||||
ldp $acc0,$acc1,[$ap]
|
||||
ldp $acc2,$acc3,[$ap,#16]
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sub_from
|
||||
|
||||
@@ -357,8 +380,10 @@ ecp_nistz256_neg:
|
||||
mov $acc1,xzr
|
||||
mov $acc2,xzr
|
||||
mov $acc3,xzr
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
bl __ecp_nistz256_sub_from
|
||||
|
||||
@@ -736,9 +761,11 @@ ecp_nistz256_point_double:
|
||||
mov $rp_real,$rp
|
||||
ldp $acc2,$acc3,[$ap,#48]
|
||||
mov $ap_real,$ap
|
||||
- ldr $poly1,.Lpoly+8
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
mov $t0,$acc0
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
mov $t1,$acc1
|
||||
ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont
|
||||
mov $t2,$acc2
|
||||
@@ -897,8 +924,10 @@ ecp_nistz256_point_add:
|
||||
mov $rp_real,$rp
|
||||
mov $ap_real,$ap
|
||||
mov $bp_real,$bp
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
orr $t0,$a0,$a1
|
||||
orr $t2,$a2,$a3
|
||||
orr $in2infty,$t0,$t2
|
||||
@@ -1151,8 +1180,10 @@ ecp_nistz256_point_add_affine:
|
||||
mov $rp_real,$rp
|
||||
mov $ap_real,$ap
|
||||
mov $bp_real,$bp
|
||||
- ldr $poly1,.Lpoly+8
|
||||
- ldr $poly3,.Lpoly+24
|
||||
+ adrp $poly3,.Lpoly
|
||||
+ add $poly3,$poly3,:lo12:.Lpoly
|
||||
+ ldr $poly1,[$poly3,#8]
|
||||
+ ldr $poly3,[$poly3,#24]
|
||||
|
||||
ldp $a0,$a1,[$ap,#64] // in1_z
|
||||
ldp $a2,$a3,[$ap,#64+16]
|
||||
@@ -1303,7 +1334,9 @@ $code.=<<___;
|
||||
stp $acc2,$acc3,[$rp_real,#$i+16]
|
||||
___
|
||||
$code.=<<___ if ($i == 0);
|
||||
- adr $bp_real,.Lone_mont-64
|
||||
+ adrp $bp_real,.Lone_mont
|
||||
+ add $bp,$bp,:lo12:.Lone_mont
|
||||
+ sub $bp,$bp,#64
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
@@ -1352,7 +1385,8 @@ ecp_nistz256_ord_mul_mont:
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
||||
- adr $ordk,.Lord
|
||||
+ adrp $ordk,.Lord
|
||||
+ add $ordk,$ordk,:lo12:.Lord
|
||||
ldr $bi,[$bp] // bp[0]
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
@@ -1493,7 +1527,8 @@ ecp_nistz256_ord_sqr_mont:
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
||||
- adr $ordk,.Lord
|
||||
+ adrp $ordk,.Lord
|
||||
+ add $ordk,$ordk,:lo12:.Lord
|
||||
ldp $a0,$a1,[$ap]
|
||||
ldp $a2,$a3,[$ap,#16]
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
Index: crypto/modes/asm/aes-gcm-armv8_64.pl
|
||||
--- crypto/modes/asm/aes-gcm-armv8_64.pl.orig
|
||||
+++ crypto/modes/asm/aes-gcm-armv8_64.pl
|
||||
@@ -5654,6 +5654,7 @@ ___
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
+.rodata
|
||||
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#endif
|
|
@ -0,0 +1,11 @@
|
|||
Index: crypto/modes/asm/ghashv8-armx.pl
|
||||
--- crypto/modes/asm/ghashv8-armx.pl.orig
|
||||
+++ crypto/modes/asm/ghashv8-armx.pl
|
||||
@@ -710,6 +710,7 @@ ___
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
+.rodata
|
||||
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#endif
|
|
@ -0,0 +1,32 @@
|
|||
Index: crypto/poly1305/asm/poly1305-armv8.pl
|
||||
--- crypto/poly1305/asm/poly1305-armv8.pl.orig
|
||||
+++ crypto/poly1305/asm/poly1305-armv8.pl
|
||||
@@ -432,7 +432,8 @@ poly1305_blocks_neon:
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
add $in2,$inp,#32
|
||||
- adr $zeros,.Lzeros
|
||||
+ adrp $zeros,.Lzeros
|
||||
+ add $zeros,$zeros,:lo12:.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
@@ -444,7 +445,8 @@ poly1305_blocks_neon:
|
||||
.align 4
|
||||
.Leven_neon:
|
||||
add $in2,$inp,#32
|
||||
- adr $zeros,.Lzeros
|
||||
+ adrp $zeros,.Lzeros
|
||||
+ add $zeros,$zeros,:lo12:.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
@@ -923,6 +925,8 @@ poly1305_emit_neon:
|
||||
|
||||
ret
|
||||
.size poly1305_emit_neon,.-poly1305_emit_neon
|
||||
+
|
||||
+.rodata
|
||||
|
||||
.align 5
|
||||
.Lzeros:
|
|
@ -0,0 +1,45 @@
|
|||
Index: crypto/sha/asm/keccak1600-armv8.pl
|
||||
--- crypto/sha/asm/keccak1600-armv8.pl.orig
|
||||
+++ crypto/sha/asm/keccak1600-armv8.pl
|
||||
@@ -80,7 +80,7 @@ my @rhotates = ([ 0, 1, 62, 28, 27 ],
|
||||
[ 18, 2, 61, 56, 14 ]);
|
||||
|
||||
$code.=<<___;
|
||||
-.text
|
||||
+.rodata
|
||||
|
||||
.align 8 // strategic alignment and padding that allows to use
|
||||
// address value as loop termination condition...
|
||||
@@ -121,10 +121,13 @@ my @A = map([ "x$_", "x".($_+1), "x".($_+2), "x".($_+3
|
||||
my @C = map("x$_", (26,27,28,30));
|
||||
|
||||
$code.=<<___;
|
||||
+.text
|
||||
+
|
||||
.type KeccakF1600_int,%function
|
||||
.align 5
|
||||
KeccakF1600_int:
|
||||
- adr $C[2],iotas
|
||||
+ adrp $C[2],iotas
|
||||
+ add $C[2],$C[2],:lo12:iotas
|
||||
.inst 0xd503233f // paciasp
|
||||
stp $C[2],x30,[sp,#16] // 32 bytes on top are mine
|
||||
b .Loop
|
||||
@@ -552,7 +555,8 @@ $code.=<<___;
|
||||
.align 5
|
||||
KeccakF1600_ce:
|
||||
mov x9,#24
|
||||
- adr x10,iotas
|
||||
+ adrp x10,iotas
|
||||
+ add x10,x10,:lo12:iotas
|
||||
b .Loop_ce
|
||||
.align 4
|
||||
.Loop_ce:
|
||||
@@ -845,6 +849,7 @@ SHA3_squeeze_cext:
|
||||
___
|
||||
} }}}
|
||||
$code.=<<___;
|
||||
+.rodata
|
||||
.asciz "Keccak-1600 absorb and squeeze for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
___
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
Index: crypto/sha/asm/sha1-armv8.pl
|
||||
--- crypto/sha/asm/sha1-armv8.pl.orig
|
||||
+++ crypto/sha/asm/sha1-armv8.pl
|
||||
@@ -256,7 +256,8 @@ sha1_block_armv8:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
- adr x4,.Lconst
|
||||
+ adrp x4,.Lconst
|
||||
+ add x4,x4,:lo12:.Lconst
|
||||
eor $E,$E,$E
|
||||
ld1.32 {$ABCD},[$ctx],#16
|
||||
ld1.32 {$E}[0],[$ctx]
|
||||
@@ -316,6 +317,9 @@ $code.=<<___;
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size sha1_block_armv8,.-sha1_block_armv8
|
||||
+
|
||||
+.rodata
|
||||
+
|
||||
.align 6
|
||||
.Lconst:
|
||||
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
|
|
@ -0,0 +1,61 @@
|
|||
Index: crypto/sha/asm/sha512-armv8.pl
|
||||
--- crypto/sha/asm/sha512-armv8.pl.orig
|
||||
+++ crypto/sha/asm/sha512-armv8.pl
|
||||
@@ -234,7 +234,8 @@ $code.=<<___;
|
||||
ldp $E,$F,[$ctx,#4*$SZ]
|
||||
add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
|
||||
ldp $G,$H,[$ctx,#6*$SZ]
|
||||
- adr $Ktbl,.LK$BITS
|
||||
+ adrp $Ktbl,.LK$BITS
|
||||
+ add $Ktbl,$Ktbl,:lo12:.LK$BITS
|
||||
stp $ctx,$num,[x29,#96]
|
||||
|
||||
.Loop:
|
||||
@@ -284,6 +285,8 @@ $code.=<<___;
|
||||
ret
|
||||
.size $func,.-$func
|
||||
|
||||
+.rodata
|
||||
+
|
||||
.align 6
|
||||
.type .LK$BITS,%object
|
||||
.LK$BITS:
|
||||
@@ -354,6 +357,8 @@ $code.=<<___;
|
||||
.size .LK$BITS,.-.LK$BITS
|
||||
.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
+
|
||||
+.text
|
||||
___
|
||||
|
||||
if ($SZ==4) {
|
||||
@@ -374,7 +379,8 @@ sha256_block_armv8:
|
||||
add x29,sp,#0
|
||||
|
||||
ld1.32 {$ABCD,$EFGH},[$ctx]
|
||||
- adr $Ktbl,.LK256
|
||||
+ adrp $Ktbl,.LK256
|
||||
+ add $Ktbl,$Ktbl,:lo12:.LK256
|
||||
|
||||
.Loop_hw:
|
||||
ld1 {@MSG[0]-@MSG[3]},[$inp],#64
|
||||
@@ -637,7 +643,8 @@ sha256_block_neon:
|
||||
mov x29, sp
|
||||
sub sp,sp,#16*4
|
||||
|
||||
- adr $Ktbl,.LK256
|
||||
+ adrp $Ktbl,.LK256
|
||||
+ add $Ktbl,$Ktbl,:lo12:.LK256
|
||||
add $num,$inp,$num,lsl#6 // len to point at the end of inp
|
||||
|
||||
ld1.8 {@X[0]},[$inp], #16
|
||||
@@ -750,7 +757,8 @@ sha512_block_armv8:
|
||||
ld1 {@MSG[4]-@MSG[7]},[$inp],#64
|
||||
|
||||
ld1.64 {@H[0]-@H[3]},[$ctx] // load context
|
||||
- adr $Ktbl,.LK512
|
||||
+ adrp $Ktbl,.LK512
|
||||
+ add $Ktbl,$Ktbl,:lo12:.LK512
|
||||
|
||||
rev64 @MSG[0],@MSG[0]
|
||||
rev64 @MSG[1],@MSG[1]
|
Loading…
Reference in New Issue
Block a user