sysy-data/performance_c/asm/crypto-3.s

814 lines
12 KiB
ArmAsm

.file "crypto-3.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl get_random
.type get_random, @function
get_random:
lla a3,.LANCHOR0
lw a5,0(a3)
slliw a4,a5,13
addw a4,a4,a5
sraiw a5,a4,31
srliw a5,a5,15
addw a5,a5,a4
sraiw a5,a5,17
addw a5,a5,a4
slliw a0,a5,5
addw a0,a0,a5
sw a0,0(a3)
ret
.size get_random, .-get_random
.align 1
.globl rotl1
.type rotl1, @function
rotl1:
sraiw a4,a0,31
srliw a4,a4,31
addw a5,a4,a0
andi a5,a5,1
subw a5,a5,a4
slliw a0,a0,1
addw a0,a0,a5
ret
.size rotl1, .-rotl1
.align 1
.globl rotl5
.type rotl5, @function
rotl5:
sraiw a4,a0,31
srliw a4,a4,27
addw a5,a4,a0
andi a5,a5,31
subw a5,a5,a4
slliw a0,a0,5
addw a0,a0,a5
ret
.size rotl5, .-rotl5
.align 1
.globl rotl30
.type rotl30, @function
rotl30:
sraiw a4,a0,31
srliw a4,a4,2
addw a5,a4,a0
slli a5,a5,34
srli a5,a5,34
subw a5,a5,a4
slliw a0,a0,30
addw a0,a0,a5
ret
.size rotl30, .-rotl30
.align 1
.globl _and
.type _and, @function
_and:
addw a0,a0,a1
ret
.size _and, .-_and
.align 1
.globl _not
.type _not, @function
_not:
not a0,a0
ret
.size _not, .-_not
.align 1
.globl _xor
.type _xor, @function
_xor:
addw a0,a0,a1
negw a0,a0
ret
.size _xor, .-_xor
.align 1
.globl _or
.type _or, @function
_or:
li a0,0
ret
.size _or, .-_or
.align 1
.globl pseudo_sha1
.type pseudo_sha1, @function
pseudo_sha1:
slli a5,a1,2
addiw a3,a1,1
mv a6,a1
li a1,-2147483648
addi sp,sp,-512
addi a1,a1,63
sd s0,496(sp)
sd s2,480(sp)
sd ra,504(sp)
sd s1,488(sp)
sd s3,472(sp)
sd s4,464(sp)
sd s5,456(sp)
sd s6,448(sp)
sd s7,440(sp)
sd s8,432(sp)
sd s9,424(sp)
sd s10,416(sp)
sd s11,408(sp)
mv s2,a0
and a4,a3,a1
add a0,a0,a5
li a7,128
sw a7,0(a0)
sext.w a4,a4
li a0,60
mv s0,a2
beq a4,a0,.L11
addi a5,a5,4
add a4,s2,a5
li a2,60
.L12:
addiw a3,a3,1
and a5,a3,a1
sw zero,0(a4)
sext.w a5,a5
addi a4,a4,4
bne a5,a2,.L12
.L11:
sraiw a4,a6,31
srliw t3,a4,24
srliw a5,a4,16
addw a2,t3,a6
addw a5,a5,a6
sraiw t1,a5,31
sraiw a7,a2,31
srliw t1,t1,24
srliw a7,a7,24
sraiw a1,a2,8
sraiw a5,a5,16
addw a5,a5,t1
addw a1,a1,a7
srliw a4,a4,8
slli a0,a3,2
addw a4,a4,a6
andi a5,a5,255
andi a1,a1,255
andi a2,a2,255
add a0,s2,a0
subw a5,a5,t1
subw a1,a1,a7
subw a2,a2,t3
sraiw a4,a4,24
addi s3,sp,80
sw a4,0(a0)
sw a5,4(a0)
sw a1,8(a0)
sw a2,12(a0)
li a1,0
li a2,320
mv a0,s3
addiw s8,a3,4
call memset@plt
li a5,-1894006784
addi a5,a5,-804
sd a5,24(sp)
li a5,-899497984
addi a5,a5,262
sd a5,40(sp)
li a5,1859776512
li s1,0
li a7,-1009590272
li a6,271732736
li a0,-1732583424
li a1,-271732736
li a2,1732583424
li t1,1518501888
li a4,1073741824
addi a5,a5,-1151
sd s8,64(sp)
mv s11,s3
addi a7,a7,496
addi a6,a6,1142
addi a0,a0,-770
addi a1,a1,-1143
addi s10,a2,769
addi s6,sp,144
addi t1,t1,-1639
addi a4,a4,-1
li a3,80
li t3,19
li s9,39
sd a5,32(sp)
mv s8,s1
sd s0,72(sp)
mv s3,s2
.L13:
mv a5,s11
mv t5,s11
mv t4,s3
.L14:
lw a2,0(t4)
lw t6,4(t4)
lw t0,8(t4)
slliw a2,a2,8
addw a2,a2,t6
slliw a2,a2,8
lw t6,12(t4)
addw a2,a2,t0
slliw a2,a2,8
addw a2,a2,t6
sw a2,0(t5)
addi t5,t5,4
addi t4,t4,16
bne t5,s6,.L14
lw t6,104(sp)
lw a2,132(sp)
lw s2,136(sp)
sd t6,48(sp)
lw t6,108(sp)
lw s1,140(sp)
lw s0,88(sp)
sd t6,56(sp)
lw t5,92(sp)
lw t4,96(sp)
lw t2,100(sp)
mv t6,s11
li t0,16
sd s10,0(sp)
sd a1,8(sp)
sd a0,16(sp)
j .L15
.L21:
sd s4,56(sp)
sd s5,48(sp)
mv t2,s7
.L15:
lw s7,32(t6)
lw s5,36(t6)
lw s4,40(t6)
lw a0,0(t6)
lw a1,4(t6)
lw s10,8(t6)
addw a2,s7,a2
addw s2,s2,s5
addw s1,s1,s4
subw s2,t5,s2
subw s1,t4,s1
subw a2,s0,a2
subw a2,a2,a0
subw a0,s2,a1
subw a1,s1,s10
srliw s1,a1,31
srliw s10,a2,31
srliw s2,a0,31
addw t4,a1,s1
addw s0,a2,s10
addw t5,a0,s2
andi s0,s0,1
andi t5,t5,1
andi t4,t4,1
subw t5,t5,s2
subw t4,t4,s1
subw s0,s0,s10
slliw s1,a1,1
slliw a2,a2,1
slliw s2,a0,1
addw a2,s0,a2
addw s2,t5,s2
addw s1,t4,s1
sw a2,64(t6)
sw s2,68(t6)
sw s1,72(t6)
addiw t0,t0,3
li a1,79
ld t5,48(sp)
ld t4,56(sp)
mv s0,t2
addi t6,t6,12
bne t0,a1,.L21
lw t6,364(sp)
lw t4,384(sp)
lw a2,340(sp)
lw t5,332(sp)
addw t4,t4,t6
subw a2,a2,t4
subw a2,a2,t5
srliw t5,a2,31
addw t4,a2,t5
ld s10,0(sp)
ld a1,8(sp)
ld a0,16(sp)
andi t4,t4,1
subw t4,t4,t5
slliw a2,a2,1
addw a2,t4,a2
sw a2,396(sp)
mv t0,s10
mv t6,a1
mv s4,a0
mv s7,a6
mv a2,a7
li t2,0
mv s5,t1
li t5,0
.L16:
sraiw s1,t0,31
srliw s1,s1,27
addw t4,s1,t0
andi t4,t4,31
sraiw s0,t6,31
subw t4,t4,s1
slliw s2,t0,5
srliw s0,s0,2
addw t4,t4,s2
addw s1,s0,t6
addw t4,t4,t5
addw t4,t4,a2
lw t5,0(a5)
and a2,s1,a4
slliw t6,t6,30
subw a2,a2,s0
addw s0,a2,t6
addw t4,t4,s5
addiw t2,t2,1
addw s1,t4,t5
mv t6,s0
addw t5,t0,s0
beq t2,a3,.L27
ble t2,t3,.L22
bgt t2,s9,.L18
ld s5,32(sp)
subw t5,t5,s4
.L17:
mv t6,t0
mv a2,s7
addi a5,a5,4
mv s7,s4
mv t0,s1
mv s4,s0
j .L16
.L18:
li a2,59
ld s5,24(sp)
li t5,0
ble t2,a2,.L17
addw t6,t0,t6
ld s5,40(sp)
subw t5,t6,s4
j .L17
.L27:
ld a5,64(sp)
addiw s8,s8,64
addw s10,s10,s1
addw a1,a1,t0
addw a0,a0,s0
addw a6,a6,s4
addw a7,a7,s7
addi s3,s3,256
bgt a5,s8,.L13
ld s0,72(sp)
ld ra,504(sp)
ld s1,488(sp)
sw s10,0(s0)
sw a1,4(s0)
sw a0,8(s0)
sw a6,12(s0)
sw a7,16(s0)
ld s0,496(sp)
ld s2,480(sp)
ld s3,472(sp)
ld s4,464(sp)
ld s5,456(sp)
ld s6,448(sp)
ld s7,440(sp)
ld s8,432(sp)
ld s9,424(sp)
ld s10,416(sp)
ld s11,408(sp)
addi sp,sp,512
jr ra
.L22:
li s5,1518501888
addi s5,s5,-1639
li t5,0
j .L17
.size pseudo_sha1, .-pseudo_sha1
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-624
sd ra,616(sp)
sd s0,608(sp)
sd s4,576(sp)
sd s1,600(sp)
sd s2,592(sp)
sd s3,584(sp)
sd s5,568(sp)
sd s6,560(sp)
sd s7,552(sp)
sd s8,544(sp)
sd s9,536(sp)
sd s10,528(sp)
sd s11,520(sp)
lla s0,.LANCHOR0
call getint@plt
sw a0,0(s0)
call getint@plt
mv s4,a0
li a0,163
call _sysy_starttime@plt
sd zero,168(sp)
sd zero,176(sp)
sw zero,184(sp)
ble s4,zero,.L29
lw a5,0(s0)
mv s11,s4
li s7,0
sd a5,72(sp)
addi a5,sp,192
sd a5,80(sp)
li a5,-1009590272
addi a5,a5,496
sd a5,88(sp)
li a5,1518501888
addi s3,a5,-1639
li a5,-1894006784
addi s2,a5,-804
li a5,-899497984
addi a5,a5,262
li s4,0
li t5,0
li t0,0
addi s9,sp,256
sd a5,24(sp)
li a5,1859776512
mv s6,s4
addi s10,a5,-1151
mv t2,t0
mv t1,t5
mv t4,s7
mv s4,s11
sd s9,96(sp)
.L30:
ld s1,72(sp)
lla a5,buffer
sd a5,32(sp)
mv a3,a5
.L31:
slliw a4,s1,13
addw a4,a4,s1
sraiw a5,a4,31
srliw a5,a5,15
addw a5,a5,a4
sraiw a5,a5,17
addw a5,a5,a4
slliw s0,a5,5
addw s1,s0,a5
sraiw a4,s1,31
srliw a4,a4,24
addw a5,s1,a4
andi a5,a5,255
subw a5,a5,a4
sw a5,0(a3)
addi a3,a3,4
lla a5,buffer+128000
mv s0,s1
bne a5,a3,.L31
li a5,128
sd s1,72(sp)
li a2,236
li a1,0
lla s1,buffer+126976
lla a0,buffer+128004
sd t2,40(sp)
sd t1,16(sp)
sd t0,8(sp)
sd t4,0(sp)
sw a5,1024(s1)
call memset@plt
ld s5,80(sp)
sd zero,buffer+128240,a5
li a5,125
li a2,320
li a1,0
mv a0,s5
sd a5,1272(s1)
call memset@plt
li a1,-271732736
li a2,1732583424
ld t2,40(sp)
ld t1,16(sp)
ld t0,8(sp)
ld t4,0(sp)
addi ra,a1,-1143
addi a1,a2,769
li a2,0
li a5,32768
sd a2,56(sp)
ld a7,88(sp)
ld a2,96(sp)
li a6,271732736
li a0,-1732583424
li a4,1073741824
addi a5,a5,-704
sd s4,104(sp)
addi a6,a6,1142
addi a0,a0,-770
addi a4,a4,-1
li a3,80
li s9,19
li s8,39
li s11,59
sd a5,64(sp)
sd t2,112(sp)
sd t1,120(sp)
sd t0,128(sp)
sd t4,136(sp)
sd s6,144(sp)
sw s0,156(sp)
mv s4,s5
.L32:
ld t3,32(sp)
sd s4,0(sp)
mv t4,s4
.L33:
lw t1,0(t3)
lw t5,4(t3)
lw t6,8(t3)
slliw t1,t1,8
addw t1,t1,t5
slliw t1,t1,8
lw t5,12(t3)
addw t1,t1,t6
slliw t1,t1,8
addw t1,t1,t5
sw t1,0(t4)
addi t4,t4,4
addi t3,t3,16
bne a2,t4,.L33
lw a5,212(sp)
lw t1,244(sp)
lw t2,248(sp)
sd a5,40(sp)
lw a5,216(sp)
lw t0,252(sp)
lw t6,200(sp)
sd a5,48(sp)
lw t4,204(sp)
lw t3,208(sp)
lw a5,220(sp)
mv t5,s4
li s1,16
sd a1,8(sp)
sd ra,16(sp)
j .L34
.L41:
mv a5,ra
sd s0,48(sp)
sd s5,40(sp)
.L34:
lw s5,32(t5)
lw s0,36(t5)
lw ra,40(t5)
lw s7,4(t5)
lw s6,8(t5)
lw a1,0(t5)
addw t1,s5,t1
addw t2,t2,s0
addw t0,t0,ra
subw t1,t6,t1
subw t2,t4,t2
subw t0,t3,t0
subw a1,t1,a1
subw t3,t2,s7
subw t1,t0,s6
srliw t2,t1,31
srliw s7,a1,31
srliw s6,t3,31
addw t0,a1,s7
addw t4,t1,t2
addw t6,t3,s6
andi t0,t0,1
andi t6,t6,1
andi t4,t4,1
subw t4,t4,t2
subw s7,t0,s7
subw t6,t6,s6
slliw t3,t3,1
slliw t0,t1,1
slliw a1,a1,1
addw t2,t6,t3
addw t0,t4,t0
addw t1,s7,a1
sw t1,64(t5)
sw t2,68(t5)
sw t0,72(t5)
mv t3,a5
addiw s1,s1,3
li a5,79
ld t6,40(sp)
ld t4,48(sp)
addi t5,t5,12
bne s1,a5,.L41
lw t5,476(sp)
lw t3,496(sp)
lw t1,452(sp)
lw t4,444(sp)
addw t3,t3,t5
subw t1,t1,t3
subw t1,t1,t4
srliw t4,t1,31
addw t3,t1,t4
ld a1,8(sp)
ld ra,16(sp)
andi t3,t3,1
subw t3,t3,t4
slliw t1,t1,1
ld a5,0(sp)
addw t3,t3,t1
sw t3,508(sp)
mv s7,a6
mv t1,a7
mv t6,a1
mv s5,a0
mv t5,ra
li t0,0
mv s6,s3
li t4,0
.L35:
sraiw s0,t6,31
srliw s0,s0,27
addw t3,s0,t6
andi t3,t3,31
sraiw t2,t5,31
subw t3,t3,s0
slliw s1,t6,5
srliw t2,t2,2
addw t3,t3,s1
addw t3,t3,t4
addw t4,t2,t5
lw s0,0(a5)
and t4,t4,a4
slliw t5,t5,30
subw t4,t4,t2
addw t3,t3,t1
addw t2,t4,t5
addw t3,t3,s6
addiw t0,t0,1
mv t5,t2
addw s0,t3,s0
addw t4,t6,t2
addi a5,a5,4
beq t0,a3,.L47
ble t0,s9,.L42
bgt t0,s8,.L37
subw t4,t4,s5
mv s6,s10
.L36:
mv t1,s7
mv t5,t6
mv s7,s5
mv t6,s0
mv s5,t2
j .L35
.L37:
mv s6,s2
li t4,0
ble t0,s11,.L36
addw t5,t6,t5
ld s6,24(sp)
subw t4,t5,s5
j .L36
.L47:
addw a1,a1,s0
ld s0,32(sp)
ld a5,56(sp)
addw ra,t6,ra
addi s0,s0,256
sd s0,32(sp)
ld s0,64(sp)
addiw a5,a5,64
sd a5,56(sp)
addw a0,a0,t2
addw a6,s5,a6
addw a7,a7,s7
bne a5,s0,.L32
ld t2,112(sp)
ld t1,120(sp)
ld t0,128(sp)
ld t4,136(sp)
ld s6,144(sp)
ld s4,104(sp)
addw t3,a1,t2
addw t6,ra,t1
addw t5,a0,t0
addw s5,a6,t4
addw s7,a7,s6
addiw s4,s4,-1
lw s0,156(sp)
negw t2,t3
negw t1,t6
negw t0,t5
negw t4,s5
negw s6,s7
bne s4,zero,.L30
lla a5,.LANCHOR0
sw t2,168(sp)
sw t1,172(sp)
sw t0,176(sp)
sw t4,180(sp)
sw s6,184(sp)
sw s0,0(a5)
.L29:
li a0,186
call _sysy_stoptime@plt
li a0,5
addi a1,sp,168
call putarray@plt
ld ra,616(sp)
ld s0,608(sp)
ld s1,600(sp)
ld s2,592(sp)
ld s3,584(sp)
ld s4,576(sp)
ld s5,568(sp)
ld s6,560(sp)
ld s7,552(sp)
ld s8,544(sp)
ld s9,536(sp)
ld s10,528(sp)
ld s11,520(sp)
li a0,0
addi sp,sp,624
jr ra
.L42:
li s6,1518501888
addi s6,s6,-1639
li t4,0
j .L36
.size main, .-main
.globl buffer
.globl state
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.data
.align 2
.set .LANCHOR0,. + 0
.type state, @object
.size state, 4
state:
.word 19260817
.bss
.align 3
.type buffer, @object
.size buffer, 131072
buffer:
.zero 131072
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits