413 lines
5.8 KiB
ArmAsm
413 lines
5.8 KiB
ArmAsm
.text
|
|
.attribute 4, 16
|
|
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
|
|
.file "04_spmv3.sy"
|
|
.globl spmv
|
|
.p2align 1
|
|
.type spmv,@function
|
|
spmv:
|
|
addi sp, sp, -80
|
|
sd ra, 72(sp)
|
|
sd s0, 64(sp)
|
|
sd s1, 56(sp)
|
|
sd s2, 48(sp)
|
|
sd s3, 40(sp)
|
|
sd s4, 32(sp)
|
|
sd s5, 24(sp)
|
|
sd s6, 16(sp)
|
|
sd s7, 8(sp)
|
|
blez a0, .LBB0_10
|
|
mv s7, a5
|
|
mv s2, a4
|
|
mv s3, a3
|
|
mv s4, a2
|
|
mv s6, a1
|
|
slli a0, a0, 32
|
|
srli s5, a0, 32
|
|
srli a2, a0, 30
|
|
mv a0, a5
|
|
li a1, 0
|
|
call memset@plt
|
|
lw a1, 0(s6)
|
|
li t0, 0
|
|
j .LBB0_3
|
|
.LBB0_2:
|
|
mv a1, a4
|
|
beq t0, s5, .LBB0_10
|
|
.LBB0_3:
|
|
slli a7, t0, 2
|
|
addi t0, t0, 1
|
|
slli a2, t0, 2
|
|
add a2, a2, s6
|
|
lw a4, 0(a2)
|
|
bge a1, a4, .LBB0_7
|
|
add a6, s6, a7
|
|
slli a4, a1, 2
|
|
add s1, s4, a4
|
|
add a5, s3, a4
|
|
.LBB0_5:
|
|
lw a4, 0(s1)
|
|
slli a4, a4, 2
|
|
add a4, a4, s7
|
|
lw a3, 0(a4)
|
|
lw a0, 0(a5)
|
|
addw a0, a0, a3
|
|
sw a0, 0(a4)
|
|
lw a4, 0(a2)
|
|
addi a1, a1, 1
|
|
addi s1, s1, 4
|
|
addi a5, a5, 4
|
|
blt a1, a4, .LBB0_5
|
|
lw a1, 0(a6)
|
|
.LBB0_7:
|
|
bge a1, a4, .LBB0_2
|
|
add a6, s2, a7
|
|
slli a0, a1, 2
|
|
add a5, s4, a0
|
|
add s1, s3, a0
|
|
.LBB0_9:
|
|
lw a0, 0(a5)
|
|
slli a0, a0, 2
|
|
add a0, a0, s7
|
|
lw a4, 0(a6)
|
|
lw s0, 0(s1)
|
|
lw a3, 0(a0)
|
|
addiw a4, a4, -1
|
|
mulw a4, a4, s0
|
|
addw a3, a3, a4
|
|
sw a3, 0(a0)
|
|
lw a4, 0(a2)
|
|
addi a1, a1, 1
|
|
addi a5, a5, 4
|
|
addi s1, s1, 4
|
|
blt a1, a4, .LBB0_9
|
|
j .LBB0_2
|
|
.LBB0_10:
|
|
ld ra, 72(sp)
|
|
ld s0, 64(sp)
|
|
ld s1, 56(sp)
|
|
ld s2, 48(sp)
|
|
ld s3, 40(sp)
|
|
ld s4, 32(sp)
|
|
ld s5, 24(sp)
|
|
ld s6, 16(sp)
|
|
ld s7, 8(sp)
|
|
addi sp, sp, 80
|
|
ret
|
|
.Lfunc_end0:
|
|
.size spmv, .Lfunc_end0-spmv
|
|
|
|
.globl main
|
|
.p2align 1
|
|
.type main,@function
|
|
main:
|
|
addi sp, sp, -112
|
|
sd ra, 104(sp)
|
|
sd s0, 96(sp)
|
|
sd s1, 88(sp)
|
|
sd s2, 80(sp)
|
|
sd s3, 72(sp)
|
|
sd s4, 64(sp)
|
|
sd s5, 56(sp)
|
|
sd s6, 48(sp)
|
|
sd s7, 40(sp)
|
|
sd s8, 32(sp)
|
|
sd s9, 24(sp)
|
|
sd s10, 16(sp)
|
|
sd s11, 8(sp)
|
|
lui s8, %hi(x)
|
|
addi s6, s8, %lo(x)
|
|
mv a0, s6
|
|
call getarray
|
|
mv s1, a0
|
|
addiw s2, a0, -1
|
|
lui a0, %hi(y)
|
|
addi s3, a0, %lo(y)
|
|
mv a0, s3
|
|
call getarray
|
|
lui a0, %hi(v)
|
|
addi s4, a0, %lo(v)
|
|
mv a0, s4
|
|
call getarray
|
|
lui a0, %hi(a)
|
|
addi s11, a0, %lo(a)
|
|
mv a0, s11
|
|
call getarray
|
|
li a0, 40
|
|
call _sysy_starttime
|
|
li a0, 2
|
|
blt s1, a0, .LBB1_21
|
|
li a0, 0
|
|
slli a1, s2, 32
|
|
srli s7, a1, 32
|
|
srli s5, a1, 30
|
|
lw s8, %lo(x)(s8)
|
|
lui a1, %hi(b)
|
|
addi s1, a1, %lo(b)
|
|
li s9, 99
|
|
j .LBB1_3
|
|
.LBB1_2:
|
|
addiw a0, s10, 1
|
|
bgeu s10, s9, .LBB1_21
|
|
.LBB1_3:
|
|
mv s10, a0
|
|
mv a0, s1
|
|
li a1, 0
|
|
mv a2, s5
|
|
call memset@plt
|
|
li t0, 0
|
|
mv a3, s8
|
|
j .LBB1_5
|
|
.LBB1_4:
|
|
mv a3, t1
|
|
beq t0, s7, .LBB1_12
|
|
.LBB1_5:
|
|
slli a7, t0, 2
|
|
addi t0, t0, 1
|
|
slli a1, t0, 2
|
|
add a1, a1, s6
|
|
lw t1, 0(a1)
|
|
bge a3, t1, .LBB1_9
|
|
add a6, a7, s6
|
|
sub a5, t1, a3
|
|
slli a4, a3, 2
|
|
add a3, a4, s4
|
|
add a4, a4, s3
|
|
.LBB1_7:
|
|
lw a2, 0(a4)
|
|
slli a2, a2, 2
|
|
add a2, a2, s1
|
|
lw a0, 0(a2)
|
|
lw a1, 0(a3)
|
|
addw a0, a0, a1
|
|
sw a0, 0(a2)
|
|
addi a5, a5, -1
|
|
addi a3, a3, 4
|
|
addi a4, a4, 4
|
|
bnez a5, .LBB1_7
|
|
lw a3, 0(a6)
|
|
.LBB1_9:
|
|
bge a3, t1, .LBB1_4
|
|
add a0, a7, s11
|
|
lw a0, 0(a0)
|
|
addiw a2, a0, -1
|
|
sub a4, t1, a3
|
|
slli a0, a3, 2
|
|
add a3, a0, s4
|
|
add a5, a0, s3
|
|
.LBB1_11:
|
|
lw a0, 0(a5)
|
|
slli a0, a0, 2
|
|
add a0, a0, s1
|
|
lw a1, 0(a3)
|
|
lw s0, 0(a0)
|
|
mulw a1, a2, a1
|
|
addw a1, a1, s0
|
|
sw a1, 0(a0)
|
|
addi a4, a4, -1
|
|
addi a3, a3, 4
|
|
addi a5, a5, 4
|
|
bnez a4, .LBB1_11
|
|
j .LBB1_4
|
|
.LBB1_12:
|
|
mv a0, s11
|
|
li a1, 0
|
|
mv a2, s5
|
|
call memset@plt
|
|
li t0, 0
|
|
mv a3, s8
|
|
j .LBB1_14
|
|
.LBB1_13:
|
|
mv a3, t1
|
|
beq t0, s7, .LBB1_2
|
|
.LBB1_14:
|
|
slli a7, t0, 2
|
|
addi t0, t0, 1
|
|
slli a1, t0, 2
|
|
add a1, a1, s6
|
|
lw t1, 0(a1)
|
|
bge a3, t1, .LBB1_18
|
|
add a6, a7, s6
|
|
sub a5, t1, a3
|
|
slli a4, a3, 2
|
|
add a3, a4, s4
|
|
add a4, a4, s3
|
|
.LBB1_16:
|
|
lw s0, 0(a4)
|
|
slli s0, s0, 2
|
|
add s0, s0, s11
|
|
lw a2, 0(s0)
|
|
lw a0, 0(a3)
|
|
addw a0, a0, a2
|
|
sw a0, 0(s0)
|
|
addi a5, a5, -1
|
|
addi a3, a3, 4
|
|
addi a4, a4, 4
|
|
bnez a5, .LBB1_16
|
|
lw a3, 0(a6)
|
|
.LBB1_18:
|
|
bge a3, t1, .LBB1_13
|
|
add a0, a7, s1
|
|
lw a0, 0(a0)
|
|
addiw a2, a0, -1
|
|
sub a4, t1, a3
|
|
slli a0, a3, 2
|
|
add a3, a0, s4
|
|
add a5, a0, s3
|
|
.LBB1_20:
|
|
lw a0, 0(a5)
|
|
slli a0, a0, 2
|
|
add a0, a0, s11
|
|
lw s0, 0(a3)
|
|
lw a1, 0(a0)
|
|
mulw s0, a2, s0
|
|
addw a1, a1, s0
|
|
sw a1, 0(a0)
|
|
addi a4, a4, -1
|
|
addi a3, a3, 4
|
|
addi a5, a5, 4
|
|
bnez a4, .LBB1_20
|
|
j .LBB1_13
|
|
.LBB1_21:
|
|
li a0, 48
|
|
call _sysy_stoptime
|
|
lui a0, %hi(b)
|
|
addi a1, a0, %lo(b)
|
|
mv a0, s2
|
|
call putarray
|
|
li a0, 0
|
|
ld ra, 104(sp)
|
|
ld s0, 96(sp)
|
|
ld s1, 88(sp)
|
|
ld s2, 80(sp)
|
|
ld s3, 72(sp)
|
|
ld s4, 64(sp)
|
|
ld s5, 56(sp)
|
|
ld s6, 48(sp)
|
|
ld s7, 40(sp)
|
|
ld s8, 32(sp)
|
|
ld s9, 24(sp)
|
|
ld s10, 16(sp)
|
|
ld s11, 8(sp)
|
|
addi sp, sp, 112
|
|
ret
|
|
.Lfunc_end1:
|
|
.size main, .Lfunc_end1-main
|
|
|
|
.type x,@object
|
|
.bss
|
|
.globl x
|
|
.p2align 2
|
|
x:
|
|
.zero 400040
|
|
.size x, 400040
|
|
|
|
.type y,@object
|
|
.globl y
|
|
.p2align 2
|
|
y:
|
|
.zero 12000000
|
|
.size y, 12000000
|
|
|
|
.type v,@object
|
|
.globl v
|
|
.p2align 2
|
|
v:
|
|
.zero 12000000
|
|
.size v, 12000000
|
|
|
|
.type a,@object
|
|
.globl a
|
|
.p2align 2
|
|
a:
|
|
.zero 400040
|
|
.size a, 400040
|
|
|
|
.type b,@object
|
|
.globl b
|
|
.p2align 2
|
|
b:
|
|
.zero 400040
|
|
.size b, 400040
|
|
|
|
.type _sysy_start,@object
|
|
.globl _sysy_start
|
|
.p2align 3
|
|
_sysy_start:
|
|
.zero 16
|
|
.size _sysy_start, 16
|
|
|
|
.type _sysy_end,@object
|
|
.globl _sysy_end
|
|
.p2align 3
|
|
_sysy_end:
|
|
.zero 16
|
|
.size _sysy_end, 16
|
|
|
|
.type _sysy_l1,@object
|
|
.globl _sysy_l1
|
|
.p2align 2
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.size _sysy_l1, 4096
|
|
|
|
.type _sysy_l2,@object
|
|
.globl _sysy_l2
|
|
.p2align 2
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.size _sysy_l2, 4096
|
|
|
|
.type _sysy_h,@object
|
|
.globl _sysy_h
|
|
.p2align 2
|
|
_sysy_h:
|
|
.zero 4096
|
|
.size _sysy_h, 4096
|
|
|
|
.type _sysy_m,@object
|
|
.globl _sysy_m
|
|
.p2align 2
|
|
_sysy_m:
|
|
.zero 4096
|
|
.size _sysy_m, 4096
|
|
|
|
.type _sysy_s,@object
|
|
.globl _sysy_s
|
|
.p2align 2
|
|
_sysy_s:
|
|
.zero 4096
|
|
.size _sysy_s, 4096
|
|
|
|
.type _sysy_us,@object
|
|
.globl _sysy_us
|
|
.p2align 2
|
|
_sysy_us:
|
|
.zero 4096
|
|
.size _sysy_us, 4096
|
|
|
|
.type _sysy_idx,@object
|
|
.section .sbss,"aw",@nobits
|
|
.globl _sysy_idx
|
|
.p2align 2
|
|
_sysy_idx:
|
|
.word 0
|
|
.size _sysy_idx, 4
|
|
|
|
.type c,@object
|
|
.bss
|
|
.globl c
|
|
.p2align 2
|
|
c:
|
|
.zero 400040
|
|
.size c, 400040
|
|
|
|
.ident "Debian clang version 14.0.6"
|
|
.section ".note.GNU-stack","",@progbits
|
|
.addrsig
|
|
.addrsig_sym x
|
|
.addrsig_sym y
|
|
.addrsig_sym v
|
|
.addrsig_sym a
|
|
.addrsig_sym b
|