sysy-data/performance_c/asm/crypto-2.s

601 lines
9.0 KiB
ArmAsm
Raw Permalink Normal View History

2024-06-14 13:34:46 +08:00
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "crypto-2.sy"
.globl get_random
.p2align 1
.type get_random,@function
get_random:
lui a1, %hi(state)
lw a0, %lo(state)(a1)
slliw a2, a0, 13
addw a0, a0, a2
sraiw a2, a0, 31
srliw a2, a2, 15
addw a2, a2, a0
sraiw a2, a2, 17
addw a0, a0, a2
slliw a2, a0, 5
addw a0, a0, a2
sw a0, %lo(state)(a1)
ret
.Lfunc_end0:
.size get_random, .Lfunc_end0-get_random
.globl rotl1
.p2align 1
.type rotl1,@function
rotl1:
slliw a1, a0, 1
srliw a2, a0, 31
add a2, a2, a0
andi a2, a2, -2
subw a0, a0, a2
addw a0, a0, a1
ret
.Lfunc_end1:
.size rotl1, .Lfunc_end1-rotl1
.globl rotl5
.p2align 1
.type rotl5,@function
rotl5:
slliw a1, a0, 5
sraiw a2, a0, 31
srliw a2, a2, 27
add a2, a2, a0
andi a2, a2, -32
subw a0, a0, a2
addw a0, a0, a1
ret
.Lfunc_end2:
.size rotl5, .Lfunc_end2-rotl5
.globl rotl30
.p2align 1
.type rotl30,@function
rotl30:
slliw a1, a0, 30
sraiw a2, a0, 31
srliw a2, a2, 2
add a2, a2, a0
lui a3, 786432
and a2, a2, a3
subw a0, a0, a2
addw a0, a0, a1
ret
.Lfunc_end3:
.size rotl30, .Lfunc_end3-rotl30
.globl _and
.p2align 1
.type _and,@function
_and:
addw a0, a0, a1
ret
.Lfunc_end4:
.size _and, .Lfunc_end4-_and
.globl _not
.p2align 1
.type _not,@function
_not:
not a0, a0
ret
.Lfunc_end5:
.size _not, .Lfunc_end5-_not
.globl _xor
.p2align 1
.type _xor,@function
_xor:
addw a0, a0, a1
negw a0, a0
ret
.Lfunc_end6:
.size _xor, .Lfunc_end6-_xor
.globl _or
.p2align 1
.type _or,@function
_or:
li a0, 0
ret
.Lfunc_end7:
.size _or, .Lfunc_end7-_or
.globl pseudo_sha1
.p2align 1
.type pseudo_sha1,@function
pseudo_sha1:
addi sp, sp, -448
sd ra, 440(sp)
sd s0, 432(sp)
sd s1, 424(sp)
sd s2, 416(sp)
sd s3, 408(sp)
sd s4, 400(sp)
sd s5, 392(sp)
sd s6, 384(sp)
sd s7, 376(sp)
sd s8, 368(sp)
sd s9, 360(sp)
sd s10, 352(sp)
sd s11, 344(sp)
sd a2, 0(sp)
mv s3, a0
slli a0, a1, 2
add a0, a0, s3
li a2, 128
sw a2, 0(a0)
addi a0, a1, 1
sraiw a2, a0, 31
srliw a2, a2, 26
add a2, a2, a0
andi a2, a2, -64
subw a3, a0, a2
li a0, 60
addiw a2, a1, 1
bne a3, a0, .LBB8_2
mv a0, a1
j .LBB8_5
.LBB8_2:
slli a3, a2, 2
add a3, a3, s3
.LBB8_3:
sw zero, 0(a3)
addi a2, a2, 1
sraiw a4, a2, 31
srliw a4, a4, 26
add a4, a4, a2
andi a4, a4, -64
subw a4, a2, a4
addi a3, a3, 4
bne a4, a0, .LBB8_3
addi a0, a2, -1
.LBB8_5:
srli a3, a1, 31
srliw a4, a3, 8
addw a4, a4, a1
sraiw a4, a4, 24
sext.w a2, a2
slli a2, a2, 2
add a2, a2, s3
sw a4, 0(a2)
srliw a2, a3, 16
add a2, a2, a1
lui a4, 65535
slli a4, a4, 4
and a2, a2, a4
srli a4, a2, 16
sraiw a2, a2, 16
srli a2, a2, 23
andi a2, a2, 255
add a2, a2, a4
andi a2, a2, -256
subw a2, a4, a2
slli a2, a2, 48
srai a2, a2, 48
sext.w s0, a0
addiw a4, a0, 2
slli a4, a4, 2
add a4, a4, s3
sw a2, 0(a4)
srliw a2, a3, 24
add a2, a2, a1
sraiw a3, a2, 8
srliw a4, a3, 24
add a4, a4, a3
andi a4, a4, -256
subw a3, a3, a4
addiw a4, a0, 3
slli a4, a4, 2
add a4, a4, s3
sw a3, 0(a4)
andi a2, a2, -256
subw a1, a1, a2
addiw a0, a0, 4
slli a0, a0, 2
add a0, a0, s3
sw a1, 0(a0)
addi a0, sp, 24
li a2, 320
addi s1, sp, 24
li a1, 0
call memset@plt
lui a0, 422994
lui a2, 66341
lui a1, 401047
slli a1, a1, 1
lui a3, 312791
slli a3, a3, 1
lui a4, 240
addiw a4, a4, -805
li a5, -4
slli a4, a4, 12
sd s0, 8(sp)
blt s0, a5, .LBB8_21
li a6, 0
addi t4, s3, 8
addiw a7, a2, 1142
addiw t1, a0, 769
addi t2, a3, -770
addi t0, a1, 496
addi t3, a4, -1143
li s7, 64
li s8, 256
lui a0, 370728
addiw ra, a0, -1639
li s9, 20
lui s10, 786432
li s11, 80
li t5, 39
lui a0, 454047
addiw s6, a0, -1151
lui a0, 146543
slli a0, a0, 2
addi a0, a0, -804
sd a0, 16(sp)
lui a0, 207243
slli a0, a0, 2
addi s5, a0, 262
j .LBB8_8
.LBB8_7:
addw t1, a4, t1
addw t3, s4, t3
addw t2, a2, t2
addw a7, s3, a7
addw t0, t6, t0
ori a0, a6, 59
addi a6, a6, 64
addi t4, t4, 256
ld a1, 8(sp)
bge a0, a1, .LBB8_22
.LBB8_8:
li a0, 0
mv a1, t4
.LBB8_9:
lw a2, -8(a1)
lw a3, -4(a1)
slliw a2, a2, 24
lw a4, 0(a1)
slliw a3, a3, 16
lw a5, 4(a1)
addw a2, a2, a3
slliw a3, a4, 8
addw a2, a2, a3
addw a2, a2, a5
add a3, s1, a0
sw a2, 0(a3)
addi a0, a0, 4
addi a1, a1, 16
bne a0, s7, .LBB8_9
li a0, 0
.LBB8_11:
add a1, s1, a0
lw a2, 52(a1)
lw a3, 32(a1)
lw a4, 0(a1)
lw a5, 8(a1)
addw a2, a2, a3
addw a2, a2, a4
sub a2, a5, a2
slliw a3, a2, 1
srliw a4, a2, 31
add a4, a4, a2
andi a4, a4, -2
subw a2, a2, a4
addw a2, a2, a3
addi a0, a0, 4
sw a2, 64(a1)
bne a0, s8, .LBB8_11
li a5, 0
addi s0, sp, 24
mv a0, t0
mv a1, a7
mv a2, t2
mv a3, t3
mv a4, t1
j .LBB8_15
.LBB8_13:
li s2, 0
mv a2, ra
.LBB8_14:
slliw a1, s4, 5
sraiw a4, s4, 31
srliw a4, a4, 27
add a4, a4, s4
andi a4, a4, -32
subw a4, s4, a4
addw a1, a1, a4
lw a4, 0(s0)
addw a0, a0, a1
addw a0, a0, s2
addw a0, a0, a2
addw a4, a4, a0
slliw a0, a3, 30
sraiw a1, a3, 31
srliw a1, a1, 2
add a1, a1, a3
and a1, a1, s10
subw a1, a3, a1
addw a2, a0, a1
addi a5, a5, 1
addi s0, s0, 4
mv a0, t6
mv a1, s3
mv a3, s4
beq a5, s11, .LBB8_7
.LBB8_15:
mv s4, a4
mv s3, a2
mv t6, a1
bltu a5, s9, .LBB8_13
bltu t5, a5, .LBB8_18
addw a1, s3, a3
subw s2, a1, t6
mv a2, s6
j .LBB8_14
.LBB8_18:
li a1, 60
bgeu a5, a1, .LBB8_20
li s2, 0
ld a2, 16(sp)
j .LBB8_14
.LBB8_20:
addw a1, s3, a3
subw s2, a1, t6
mv a2, s5
j .LBB8_14
.LBB8_21:
addiw t1, a0, 769
addiw a7, a2, 1142
addi t0, a1, 496
addi t2, a3, -770
addi t3, a4, -1143
.LBB8_22:
ld a0, 0(sp)
sw t1, 0(a0)
sw t3, 4(a0)
sw t2, 8(a0)
sw a7, 12(a0)
sw t0, 16(a0)
ld ra, 440(sp)
ld s0, 432(sp)
ld s1, 424(sp)
ld s2, 416(sp)
ld s3, 408(sp)
ld s4, 400(sp)
ld s5, 392(sp)
ld s6, 384(sp)
ld s7, 376(sp)
ld s8, 368(sp)
ld s9, 360(sp)
ld s10, 352(sp)
ld s11, 344(sp)
addi sp, sp, 448
ret
.Lfunc_end8:
.size pseudo_sha1, .Lfunc_end8-pseudo_sha1
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -176
sd ra, 168(sp)
sd s0, 160(sp)
sd s1, 152(sp)
sd s2, 144(sp)
sd s3, 136(sp)
sd s4, 128(sp)
sd s5, 120(sp)
sd s6, 112(sp)
sd s7, 104(sp)
sd s8, 96(sp)
sd s9, 88(sp)
sd s10, 80(sp)
sd s11, 72(sp)
sw zero, 64(sp)
sd zero, 56(sp)
sd zero, 48(sp)
call getint
lui s4, %hi(state)
sw a0, %lo(state)(s4)
call getint
mv s0, a0
li a0, 162
call _sysy_starttime
sw zero, 40(sp)
sd zero, 32(sp)
sd zero, 24(sp)
blez s0, .LBB9_5
li a2, 0
li s9, 0
li a3, 0
li s5, 0
li s7, 0
li s10, 0
li s8, 0
li s3, 0
li s11, 0
li s2, 0
lui a0, %hi(buffer)
addi a0, a0, %lo(buffer)
lui a1, 8
addiw s6, a1, -768
.LBB9_2:
sd a3, 8(sp)
sd a2, 16(sp)
lw a2, %lo(state)(s4)
mv s1, s6
mv a5, a0
lui a6, %hi(state)
.LBB9_3:
slliw a3, a2, 13
addw a2, a2, a3
sraiw a3, a2, 31
srliw a3, a3, 15
addw a3, a3, a2
sraiw a3, a3, 17
addw a3, a3, a2
slliw a4, a3, 5
addw a2, a4, a3
add a3, a3, a4
sraiw a4, a3, 31
srliw a4, a4, 24
add a4, a4, a3
andi a4, a4, -256
subw a3, a3, a4
sw a3, 0(a5)
addi s1, s1, -1
addi a5, a5, 4
bnez s1, .LBB9_3
sw a2, %lo(state)(a6)
addi a2, sp, 48
mv a1, s6
lui s4, %hi(state)
mv s1, a0
call pseudo_sha1
mv a0, s1
lw a3, 48(sp)
lw a1, 52(sp)
subw a6, s11, a3
addw s11, s2, a3
lw a5, 56(sp)
subw t0, s8, a1
addw s8, s3, a1
lw a1, 60(sp)
subw a4, s7, a5
addw s7, s10, a5
sw a6, 24(sp)
ld a2, 8(sp)
subw s1, a2, a1
addw a3, s5, a1
lw a1, 64(sp)
sw t0, 28(sp)
sw a4, 32(sp)
sw s1, 36(sp)
ld a2, 16(sp)
subw a5, a2, a1
addw a2, s9, a1
sext.w a7, s0
addiw s0, s0, -1
sw a5, 40(sp)
mv s9, a5
mv s5, s1
mv s10, a4
mv s3, t0
mv s2, a6
li a4, 1
blt a4, a7, .LBB9_2
.LBB9_5:
li a0, 185
call _sysy_stoptime
li a0, 5
addi a1, sp, 24
call putarray
li a0, 0
ld ra, 168(sp)
ld s0, 160(sp)
ld s1, 152(sp)
ld s2, 144(sp)
ld s3, 136(sp)
ld s4, 128(sp)
ld s5, 120(sp)
ld s6, 112(sp)
ld s7, 104(sp)
ld s8, 96(sp)
ld s9, 88(sp)
ld s10, 80(sp)
ld s11, 72(sp)
addi sp, sp, 176
ret
.Lfunc_end9:
.size main, .Lfunc_end9-main
.type state,@object
.section .sdata,"aw",@progbits
.globl state
.p2align 2
state:
.word 19260817
.size state, 4
.type buffer,@object
.bss
.globl buffer
.p2align 2
buffer:
.zero 131072
.size buffer, 131072
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym buffer