sysy-data/performance_c/asm/stencil1.s

290 lines
4.3 KiB
ArmAsm
Raw Normal View History

2024-06-14 13:10:27 +08:00
.file "stencil1.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl cutout
.type cutout, @function
cutout:
li a4,255
ble a0,a4,.L2
li a0,255
ret
.L2:
sext.w a0,a0
not a5,a0
srai a5,a5,63
and a0,a0,a5
ret
.size cutout, .-cutout
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-112
lla a0,image_in
sd ra,104(sp)
sd s0,96(sp)
sd s1,88(sp)
sd s11,8(sp)
sd s2,80(sp)
sd s3,72(sp)
sd s4,64(sp)
sd s5,56(sp)
sd s6,48(sp)
sd s7,40(sp)
sd s8,32(sp)
sd s9,24(sp)
sd s10,16(sp)
call getarray@plt
mv s11,a0
li a0,25
call _sysy_starttime@plt
li t0,-4096
li t3,4096
li t2,1048576
addi s0,t0,12
lla t4,image_in+4084
lla t5,image_out+8180
lla a7,image_in
lla a0,image_in+8188
lla t1,image_in+8
li t6,0
addi t0,t0,16
li a6,255
li ra,1021
addi s1,t3,-8
addi t2,t2,-2048
.L6:
lw s7,4(a0)
lw s6,8(a0)
lw s5,12(t4)
lw s4,16(t4)
lw s3,-8(t1)
lw s2,-4(t1)
addiw t6,t6,1024
add a1,a0,s0
addi a2,a0,12
add a3,t5,t0
mv a4,t1
li s8,1
.L15:
slliw a5,s4,3
subw a5,a5,s3
lw s3,0(a4)
subw a5,a5,s2
addiw s8,s8,2
subw a5,a5,s3
subw a5,a5,s5
lw s5,0(a1)
subw a5,a5,s5
subw a5,a5,s7
lw s7,0(a2)
subw a5,a5,s6
slliw s9,s5,3
subw a5,a5,s7
subw s9,s9,s2
sext.w s10,a5
subw s9,s9,s3
ble s10,a6,.L11
li a5,255
li s10,255
.L11:
lw s2,4(a4)
not s10,s10
srai s10,s10,63
subw s9,s9,s2
subw s9,s9,s4
lw s4,4(a1)
and a5,a5,s10
sw a5,0(a3)
subw a5,s9,s4
subw a5,a5,s6
lw s6,4(a2)
subw a5,a5,s7
addi a4,a4,8
subw a5,a5,s6
sext.w s9,a5
ble s9,a6,.L13
li a5,255
li s9,255
.L13:
not s9,s9
srai s9,s9,63
and a5,a5,s9
sw a5,4(a3)
addi a1,a1,8
addi a2,a2,8
addi a3,a3,8
bne s8,ra,.L15
addi a4,a0,-8
add a2,a0,s1
mv a1,t5
mv a3,t4
.L9:
lw a5,0(a4)
lw s5,-4(a3)
lw s4,0(a3)
lw s3,4(a3)
slliw a5,a5,3
lw s2,-4(a4)
subw a5,a5,s5
subw a5,a5,s4
lw s5,4(a4)
subw a5,a5,s3
lw s4,-4(a2)
subw a5,a5,s2
lw s3,0(a2)
lw s2,4(a2)
subw a5,a5,s5
subw a5,a5,s4
subw a5,a5,s3
subw a5,a5,s2
sext.w s2,a5
addi a4,a4,4
ble s2,a6,.L7
li a5,255
li s2,255
.L7:
not s2,s2
srai s2,s2,63
and a5,a5,s2
sw a5,0(a1)
addi a3,a3,4
addi a2,a2,4
addi a1,a1,4
bne a4,a0,.L9
add t4,t4,t3
add t5,t5,t3
add a0,a4,t3
add t1,t1,t3
bne t6,t2,.L6
li a4,4096
lla a5,image_out
lla a0,image_in+4194304
addi a3,a4,-4
.L18:
lw a2,0(a7)
add a1,a7,a3
lw a1,0(a1)
sw a2,0(a5)
add a2,a5,a3
sw a1,0(a2)
add a7,a7,a4
add a5,a5,a4
bne a0,a7,.L18
lla a5,image_in
lla a4,image_out
lla a3,image_in+4096
.L19:
ld a7,0(a5)
ld a6,8(a5)
ld a1,16(a5)
ld a2,24(a5)
sd a7,0(a4)
sd a6,8(a4)
sd a1,16(a4)
sd a2,24(a4)
addi a5,a5,32
addi a4,a4,32
bne a5,a3,.L19
lla a5,image_in+4190208
lla a4,image_out+4190208
.L20:
ld a6,0(a5)
ld a1,8(a5)
ld a2,16(a5)
ld a3,24(a5)
sd a6,0(a4)
sd a1,8(a4)
sd a2,16(a4)
sd a3,24(a4)
addi a5,a5,32
addi a4,a4,32
bne a5,a0,.L20
li a0,61
call _sysy_stoptime@plt
li a0,1048576
lla a1,image_out
call putarray@plt
ld ra,104(sp)
ld s0,96(sp)
ld s1,88(sp)
ld s2,80(sp)
ld s3,72(sp)
ld s4,64(sp)
ld s5,56(sp)
ld s6,48(sp)
ld s7,40(sp)
ld s8,32(sp)
ld s9,24(sp)
ld s10,16(sp)
mv a0,s11
ld s11,8(sp)
addi sp,sp,112
jr ra
.size main, .-main
.globl image_out
.globl image_in
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.bss
.align 3
.type image_out, @object
.size image_out, 4194304
image_out:
.zero 4194304
.type image_in, @object
.size image_in, 4194304
image_in:
.zero 4194304
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits