sysy-data/final_performance_c/asm/04_spmv3.s

413 lines
5.8 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "04_spmv3.sy"
.globl spmv
.p2align 1
.type spmv,@function
spmv:
addi sp, sp, -80
sd ra, 72(sp)
sd s0, 64(sp)
sd s1, 56(sp)
sd s2, 48(sp)
sd s3, 40(sp)
sd s4, 32(sp)
sd s5, 24(sp)
sd s6, 16(sp)
sd s7, 8(sp)
blez a0, .LBB0_10
mv s7, a5
mv s2, a4
mv s3, a3
mv s4, a2
mv s6, a1
slli a0, a0, 32
srli s5, a0, 32
srli a2, a0, 30
mv a0, a5
li a1, 0
call memset@plt
lw a1, 0(s6)
li t0, 0
j .LBB0_3
.LBB0_2:
mv a1, a4
beq t0, s5, .LBB0_10
.LBB0_3:
slli a7, t0, 2
addi t0, t0, 1
slli a2, t0, 2
add a2, a2, s6
lw a4, 0(a2)
bge a1, a4, .LBB0_7
add a6, s6, a7
slli a4, a1, 2
add s1, s4, a4
add a5, s3, a4
.LBB0_5:
lw a4, 0(s1)
slli a4, a4, 2
add a4, a4, s7
lw a3, 0(a4)
lw a0, 0(a5)
addw a0, a0, a3
sw a0, 0(a4)
lw a4, 0(a2)
addi a1, a1, 1
addi s1, s1, 4
addi a5, a5, 4
blt a1, a4, .LBB0_5
lw a1, 0(a6)
.LBB0_7:
bge a1, a4, .LBB0_2
add a6, s2, a7
slli a0, a1, 2
add a5, s4, a0
add s1, s3, a0
.LBB0_9:
lw a0, 0(a5)
slli a0, a0, 2
add a0, a0, s7
lw a4, 0(a6)
lw s0, 0(s1)
lw a3, 0(a0)
addiw a4, a4, -1
mulw a4, a4, s0
addw a3, a3, a4
sw a3, 0(a0)
lw a4, 0(a2)
addi a1, a1, 1
addi a5, a5, 4
addi s1, s1, 4
blt a1, a4, .LBB0_9
j .LBB0_2
.LBB0_10:
ld ra, 72(sp)
ld s0, 64(sp)
ld s1, 56(sp)
ld s2, 48(sp)
ld s3, 40(sp)
ld s4, 32(sp)
ld s5, 24(sp)
ld s6, 16(sp)
ld s7, 8(sp)
addi sp, sp, 80
ret
.Lfunc_end0:
.size spmv, .Lfunc_end0-spmv
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -112
sd ra, 104(sp)
sd s0, 96(sp)
sd s1, 88(sp)
sd s2, 80(sp)
sd s3, 72(sp)
sd s4, 64(sp)
sd s5, 56(sp)
sd s6, 48(sp)
sd s7, 40(sp)
sd s8, 32(sp)
sd s9, 24(sp)
sd s10, 16(sp)
sd s11, 8(sp)
lui s8, %hi(x)
addi s6, s8, %lo(x)
mv a0, s6
call getarray
mv s1, a0
addiw s2, a0, -1
lui a0, %hi(y)
addi s3, a0, %lo(y)
mv a0, s3
call getarray
lui a0, %hi(v)
addi s4, a0, %lo(v)
mv a0, s4
call getarray
lui a0, %hi(a)
addi s11, a0, %lo(a)
mv a0, s11
call getarray
li a0, 40
call _sysy_starttime
li a0, 2
blt s1, a0, .LBB1_21
li a0, 0
slli a1, s2, 32
srli s7, a1, 32
srli s5, a1, 30
lw s8, %lo(x)(s8)
lui a1, %hi(b)
addi s1, a1, %lo(b)
li s9, 99
j .LBB1_3
.LBB1_2:
addiw a0, s10, 1
bgeu s10, s9, .LBB1_21
.LBB1_3:
mv s10, a0
mv a0, s1
li a1, 0
mv a2, s5
call memset@plt
li t0, 0
mv a3, s8
j .LBB1_5
.LBB1_4:
mv a3, t1
beq t0, s7, .LBB1_12
.LBB1_5:
slli a7, t0, 2
addi t0, t0, 1
slli a1, t0, 2
add a1, a1, s6
lw t1, 0(a1)
bge a3, t1, .LBB1_9
add a6, a7, s6
sub a5, t1, a3
slli a4, a3, 2
add a3, a4, s4
add a4, a4, s3
.LBB1_7:
lw a2, 0(a4)
slli a2, a2, 2
add a2, a2, s1
lw a0, 0(a2)
lw a1, 0(a3)
addw a0, a0, a1
sw a0, 0(a2)
addi a5, a5, -1
addi a3, a3, 4
addi a4, a4, 4
bnez a5, .LBB1_7
lw a3, 0(a6)
.LBB1_9:
bge a3, t1, .LBB1_4
add a0, a7, s11
lw a0, 0(a0)
addiw a2, a0, -1
sub a4, t1, a3
slli a0, a3, 2
add a3, a0, s4
add a5, a0, s3
.LBB1_11:
lw a0, 0(a5)
slli a0, a0, 2
add a0, a0, s1
lw a1, 0(a3)
lw s0, 0(a0)
mulw a1, a2, a1
addw a1, a1, s0
sw a1, 0(a0)
addi a4, a4, -1
addi a3, a3, 4
addi a5, a5, 4
bnez a4, .LBB1_11
j .LBB1_4
.LBB1_12:
mv a0, s11
li a1, 0
mv a2, s5
call memset@plt
li t0, 0
mv a3, s8
j .LBB1_14
.LBB1_13:
mv a3, t1
beq t0, s7, .LBB1_2
.LBB1_14:
slli a7, t0, 2
addi t0, t0, 1
slli a1, t0, 2
add a1, a1, s6
lw t1, 0(a1)
bge a3, t1, .LBB1_18
add a6, a7, s6
sub a5, t1, a3
slli a4, a3, 2
add a3, a4, s4
add a4, a4, s3
.LBB1_16:
lw s0, 0(a4)
slli s0, s0, 2
add s0, s0, s11
lw a2, 0(s0)
lw a0, 0(a3)
addw a0, a0, a2
sw a0, 0(s0)
addi a5, a5, -1
addi a3, a3, 4
addi a4, a4, 4
bnez a5, .LBB1_16
lw a3, 0(a6)
.LBB1_18:
bge a3, t1, .LBB1_13
add a0, a7, s1
lw a0, 0(a0)
addiw a2, a0, -1
sub a4, t1, a3
slli a0, a3, 2
add a3, a0, s4
add a5, a0, s3
.LBB1_20:
lw a0, 0(a5)
slli a0, a0, 2
add a0, a0, s11
lw s0, 0(a3)
lw a1, 0(a0)
mulw s0, a2, s0
addw a1, a1, s0
sw a1, 0(a0)
addi a4, a4, -1
addi a3, a3, 4
addi a5, a5, 4
bnez a4, .LBB1_20
j .LBB1_13
.LBB1_21:
li a0, 48
call _sysy_stoptime
lui a0, %hi(b)
addi a1, a0, %lo(b)
mv a0, s2
call putarray
li a0, 0
ld ra, 104(sp)
ld s0, 96(sp)
ld s1, 88(sp)
ld s2, 80(sp)
ld s3, 72(sp)
ld s4, 64(sp)
ld s5, 56(sp)
ld s6, 48(sp)
ld s7, 40(sp)
ld s8, 32(sp)
ld s9, 24(sp)
ld s10, 16(sp)
ld s11, 8(sp)
addi sp, sp, 112
ret
.Lfunc_end1:
.size main, .Lfunc_end1-main
.type x,@object
.bss
.globl x
.p2align 2
x:
.zero 400040
.size x, 400040
.type y,@object
.globl y
.p2align 2
y:
.zero 12000000
.size y, 12000000
.type v,@object
.globl v
.p2align 2
v:
.zero 12000000
.size v, 12000000
.type a,@object
.globl a
.p2align 2
a:
.zero 400040
.size a, 400040
.type b,@object
.globl b
.p2align 2
b:
.zero 400040
.size b, 400040
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.type c,@object
.bss
.globl c
.p2align 2
c:
.zero 400040
.size c, 400040
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym x
.addrsig_sym y
.addrsig_sym v
.addrsig_sym a
.addrsig_sym b