sysy-data/final_performance_c/asm/04_spmv3.s

297 lines
4.2 KiB
ArmAsm

.file "04_spmv3.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.type spmv.constprop.0, @function
spmv.constprop.0:
ble a0,zero,.L13
addi sp,sp,-32
mv a7,a2
slli a5,a0,32
sd s0,16(sp)
sd s1,8(sp)
mv s0,a0
mv s1,a1
srli a2,a5,30
mv a0,a7
li a1,0
sd ra,24(sp)
call memset@plt
slli t1,s0,2
mv a7,a0
mv a2,s1
lla a6,x+4
add t1,s1,t1
lla t4,y
lla t3,v
.L4:
lw a1,-4(a6)
lw a5,0(a6)
slli a3,a1,2
add a0,t4,a3
add a3,t3,a3
ble a5,a1,.L7
.L6:
lw a5,0(a0)
lw a4,0(a3)
addiw a1,a1,1
slli a5,a5,2
add a5,a7,a5
lw t5,0(a5)
addi a0,a0,4
addi a3,a3,4
addw a4,a4,t5
sw a4,0(a5)
lw a5,0(a6)
blt a1,a5,.L6
lw a1,-4(a6)
ble a5,a1,.L7
slli a3,a1,2
add a0,t4,a3
add a3,t3,a3
.L8:
lw a5,0(a2)
lw t5,0(a3)
lw a4,0(a0)
addiw a5,a5,-1
mulw a5,a5,t5
slli a4,a4,2
add a4,a7,a4
lw t5,0(a4)
addiw a1,a1,1
addi a0,a0,4
addi a3,a3,4
addw a5,a5,t5
sw a5,0(a4)
lw a5,0(a6)
bgt a5,a1,.L8
.L7:
addi a2,a2,4
addi a6,a6,4
bne t1,a2,.L4
ld ra,24(sp)
ld s0,16(sp)
ld s1,8(sp)
addi sp,sp,32
jr ra
.L13:
ret
.size spmv.constprop.0, .-spmv.constprop.0
.align 1
.globl spmv
.type spmv, @function
spmv:
ble a0,zero,.L30
addi sp,sp,-48
sd s4,0(sp)
mv s4,a4
slli a4,a0,32
sd s0,32(sp)
sd s2,16(sp)
sd s3,8(sp)
mv s2,a0
mv s3,a1
mv a0,a5
mv s0,a2
li a1,0
srli a2,a4,30
sd s1,24(sp)
sd ra,40(sp)
mv s1,a3
call memset@plt
slli a2,s2,2
mv a5,a0
mv a3,s4
addi t1,s3,4
add a2,s4,a2
.L20:
lw a6,-4(t1)
lw a4,0(t1)
slli a0,a6,2
add a7,s0,a0
add a0,s1,a0
ble a4,a6,.L23
.L22:
lw a4,0(a7)
lw a1,0(a0)
addiw a6,a6,1
slli a4,a4,2
add a4,a5,a4
lw t3,0(a4)
addi a7,a7,4
addi a0,a0,4
addw a1,a1,t3
sw a1,0(a4)
lw a4,0(t1)
bgt a4,a6,.L22
lw a6,-4(t1)
ble a4,a6,.L23
slli a0,a6,2
add a7,s0,a0
add a0,s1,a0
.L24:
lw a4,0(a3)
lw t3,0(a0)
lw a1,0(a7)
addiw a4,a4,-1
mulw a4,a4,t3
slli a1,a1,2
add a1,a5,a1
lw t3,0(a1)
addiw a6,a6,1
addi a7,a7,4
addi a0,a0,4
addw a4,a4,t3
sw a4,0(a1)
lw a4,0(t1)
bgt a4,a6,.L24
.L23:
addi a3,a3,4
addi t1,t1,4
bne a2,a3,.L20
ld ra,40(sp)
ld s0,32(sp)
ld s1,24(sp)
ld s2,16(sp)
ld s3,8(sp)
ld s4,0(sp)
addi sp,sp,48
jr ra
.L30:
ret
.size spmv, .-spmv
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-48
lla a0,x
sd ra,40(sp)
sd s0,32(sp)
sd s1,24(sp)
sd s2,16(sp)
sd s3,8(sp)
call getarray@plt
mv s1,a0
lla a0,y
call getarray@plt
lla a0,v
call getarray@plt
lla a0,a
call getarray@plt
li a0,40
addiw s1,s1,-1
call _sysy_starttime@plt
li s0,100
lla s3,b
lla s2,a
.L34:
mv a2,s3
mv a1,s2
mv a0,s1
call spmv.constprop.0
addiw s0,s0,-1
mv a2,s2
mv a1,s3
mv a0,s1
call spmv.constprop.0
bne s0,zero,.L34
li a0,48
call _sysy_stoptime@plt
mv a0,s1
lla a1,b
call putarray@plt
ld ra,40(sp)
ld s0,32(sp)
ld s1,24(sp)
ld s2,16(sp)
ld s3,8(sp)
li a0,0
addi sp,sp,48
jr ra
.size main, .-main
.globl c
.globl b
.globl a
.globl v
.globl y
.globl x
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.bss
.align 3
.type c, @object
.size c, 400040
c:
.zero 400040
.type b, @object
.size b, 400040
b:
.zero 400040
.type a, @object
.size a, 400040
a:
.zero 400040
.type v, @object
.size v, 12000000
v:
.zero 12000000
.type y, @object
.size y, 12000000
y:
.zero 12000000
.type x, @object
.size x, 400040
x:
.zero 400040
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits