290 lines
4.3 KiB
ArmAsm
290 lines
4.3 KiB
ArmAsm
.file "stencil1.sy"
|
|
.option pic
|
|
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
|
.attribute unaligned_access, 0
|
|
.attribute stack_align, 16
|
|
.text
|
|
.align 1
|
|
.globl cutout
|
|
.type cutout, @function
|
|
cutout:
|
|
li a4,255
|
|
ble a0,a4,.L2
|
|
li a0,255
|
|
ret
|
|
.L2:
|
|
sext.w a0,a0
|
|
not a5,a0
|
|
srai a5,a5,63
|
|
and a0,a0,a5
|
|
ret
|
|
.size cutout, .-cutout
|
|
.section .text.startup,"ax",@progbits
|
|
.align 1
|
|
.globl main
|
|
.type main, @function
|
|
main:
|
|
addi sp,sp,-112
|
|
lla a0,image_in
|
|
sd ra,104(sp)
|
|
sd s0,96(sp)
|
|
sd s1,88(sp)
|
|
sd s11,8(sp)
|
|
sd s2,80(sp)
|
|
sd s3,72(sp)
|
|
sd s4,64(sp)
|
|
sd s5,56(sp)
|
|
sd s6,48(sp)
|
|
sd s7,40(sp)
|
|
sd s8,32(sp)
|
|
sd s9,24(sp)
|
|
sd s10,16(sp)
|
|
call getarray@plt
|
|
mv s11,a0
|
|
li a0,25
|
|
call _sysy_starttime@plt
|
|
li t0,-4096
|
|
li t3,4096
|
|
li t2,1048576
|
|
addi s0,t0,12
|
|
lla t4,image_in+4084
|
|
lla t5,image_out+8180
|
|
lla a7,image_in
|
|
lla a0,image_in+8188
|
|
lla t1,image_in+8
|
|
li t6,0
|
|
addi t0,t0,16
|
|
li a6,255
|
|
li ra,1021
|
|
addi s1,t3,-8
|
|
addi t2,t2,-2048
|
|
.L6:
|
|
lw s7,4(a0)
|
|
lw s6,8(a0)
|
|
lw s5,12(t4)
|
|
lw s4,16(t4)
|
|
lw s3,-8(t1)
|
|
lw s2,-4(t1)
|
|
addiw t6,t6,1024
|
|
add a1,a0,s0
|
|
addi a2,a0,12
|
|
add a3,t5,t0
|
|
mv a4,t1
|
|
li s8,1
|
|
.L15:
|
|
slliw a5,s4,3
|
|
subw a5,a5,s3
|
|
lw s3,0(a4)
|
|
subw a5,a5,s2
|
|
addiw s8,s8,2
|
|
subw a5,a5,s3
|
|
subw a5,a5,s5
|
|
lw s5,0(a1)
|
|
subw a5,a5,s5
|
|
subw a5,a5,s7
|
|
lw s7,0(a2)
|
|
subw a5,a5,s6
|
|
slliw s9,s5,3
|
|
subw a5,a5,s7
|
|
subw s9,s9,s2
|
|
sext.w s10,a5
|
|
subw s9,s9,s3
|
|
ble s10,a6,.L11
|
|
li a5,255
|
|
li s10,255
|
|
.L11:
|
|
lw s2,4(a4)
|
|
not s10,s10
|
|
srai s10,s10,63
|
|
subw s9,s9,s2
|
|
subw s9,s9,s4
|
|
lw s4,4(a1)
|
|
and a5,a5,s10
|
|
sw a5,0(a3)
|
|
subw a5,s9,s4
|
|
subw a5,a5,s6
|
|
lw s6,4(a2)
|
|
subw a5,a5,s7
|
|
addi a4,a4,8
|
|
subw a5,a5,s6
|
|
sext.w s9,a5
|
|
ble s9,a6,.L13
|
|
li a5,255
|
|
li s9,255
|
|
.L13:
|
|
not s9,s9
|
|
srai s9,s9,63
|
|
and a5,a5,s9
|
|
sw a5,4(a3)
|
|
addi a1,a1,8
|
|
addi a2,a2,8
|
|
addi a3,a3,8
|
|
bne s8,ra,.L15
|
|
addi a4,a0,-8
|
|
add a2,a0,s1
|
|
mv a1,t5
|
|
mv a3,t4
|
|
.L9:
|
|
lw a5,0(a4)
|
|
lw s5,-4(a3)
|
|
lw s4,0(a3)
|
|
lw s3,4(a3)
|
|
slliw a5,a5,3
|
|
lw s2,-4(a4)
|
|
subw a5,a5,s5
|
|
subw a5,a5,s4
|
|
lw s5,4(a4)
|
|
subw a5,a5,s3
|
|
lw s4,-4(a2)
|
|
subw a5,a5,s2
|
|
lw s3,0(a2)
|
|
lw s2,4(a2)
|
|
subw a5,a5,s5
|
|
subw a5,a5,s4
|
|
subw a5,a5,s3
|
|
subw a5,a5,s2
|
|
sext.w s2,a5
|
|
addi a4,a4,4
|
|
ble s2,a6,.L7
|
|
li a5,255
|
|
li s2,255
|
|
.L7:
|
|
not s2,s2
|
|
srai s2,s2,63
|
|
and a5,a5,s2
|
|
sw a5,0(a1)
|
|
addi a3,a3,4
|
|
addi a2,a2,4
|
|
addi a1,a1,4
|
|
bne a4,a0,.L9
|
|
add t4,t4,t3
|
|
add t5,t5,t3
|
|
add a0,a4,t3
|
|
add t1,t1,t3
|
|
bne t6,t2,.L6
|
|
li a4,4096
|
|
lla a5,image_out
|
|
lla a0,image_in+4194304
|
|
addi a3,a4,-4
|
|
.L18:
|
|
lw a2,0(a7)
|
|
add a1,a7,a3
|
|
lw a1,0(a1)
|
|
sw a2,0(a5)
|
|
add a2,a5,a3
|
|
sw a1,0(a2)
|
|
add a7,a7,a4
|
|
add a5,a5,a4
|
|
bne a0,a7,.L18
|
|
lla a5,image_in
|
|
lla a4,image_out
|
|
lla a3,image_in+4096
|
|
.L19:
|
|
ld a7,0(a5)
|
|
ld a6,8(a5)
|
|
ld a1,16(a5)
|
|
ld a2,24(a5)
|
|
sd a7,0(a4)
|
|
sd a6,8(a4)
|
|
sd a1,16(a4)
|
|
sd a2,24(a4)
|
|
addi a5,a5,32
|
|
addi a4,a4,32
|
|
bne a5,a3,.L19
|
|
lla a5,image_in+4190208
|
|
lla a4,image_out+4190208
|
|
.L20:
|
|
ld a6,0(a5)
|
|
ld a1,8(a5)
|
|
ld a2,16(a5)
|
|
ld a3,24(a5)
|
|
sd a6,0(a4)
|
|
sd a1,8(a4)
|
|
sd a2,16(a4)
|
|
sd a3,24(a4)
|
|
addi a5,a5,32
|
|
addi a4,a4,32
|
|
bne a5,a0,.L20
|
|
li a0,61
|
|
call _sysy_stoptime@plt
|
|
li a0,1048576
|
|
lla a1,image_out
|
|
call putarray@plt
|
|
ld ra,104(sp)
|
|
ld s0,96(sp)
|
|
ld s1,88(sp)
|
|
ld s2,80(sp)
|
|
ld s3,72(sp)
|
|
ld s4,64(sp)
|
|
ld s5,56(sp)
|
|
ld s6,48(sp)
|
|
ld s7,40(sp)
|
|
ld s8,32(sp)
|
|
ld s9,24(sp)
|
|
ld s10,16(sp)
|
|
mv a0,s11
|
|
ld s11,8(sp)
|
|
addi sp,sp,112
|
|
jr ra
|
|
.size main, .-main
|
|
.globl image_out
|
|
.globl image_in
|
|
.globl _sysy_idx
|
|
.globl _sysy_us
|
|
.globl _sysy_s
|
|
.globl _sysy_m
|
|
.globl _sysy_h
|
|
.globl _sysy_l2
|
|
.globl _sysy_l1
|
|
.globl _sysy_end
|
|
.globl _sysy_start
|
|
.bss
|
|
.align 3
|
|
.type image_out, @object
|
|
.size image_out, 4194304
|
|
image_out:
|
|
.zero 4194304
|
|
.type image_in, @object
|
|
.size image_in, 4194304
|
|
image_in:
|
|
.zero 4194304
|
|
.type _sysy_idx, @object
|
|
.size _sysy_idx, 4
|
|
_sysy_idx:
|
|
.zero 4
|
|
.zero 4
|
|
.type _sysy_us, @object
|
|
.size _sysy_us, 4096
|
|
_sysy_us:
|
|
.zero 4096
|
|
.type _sysy_s, @object
|
|
.size _sysy_s, 4096
|
|
_sysy_s:
|
|
.zero 4096
|
|
.type _sysy_m, @object
|
|
.size _sysy_m, 4096
|
|
_sysy_m:
|
|
.zero 4096
|
|
.type _sysy_h, @object
|
|
.size _sysy_h, 4096
|
|
_sysy_h:
|
|
.zero 4096
|
|
.type _sysy_l2, @object
|
|
.size _sysy_l2, 4096
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.type _sysy_l1, @object
|
|
.size _sysy_l1, 4096
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.type _sysy_end, @object
|
|
.size _sysy_end, 16
|
|
_sysy_end:
|
|
.zero 16
|
|
.type _sysy_start, @object
|
|
.size _sysy_start, 16
|
|
_sysy_start:
|
|
.zero 16
|
|
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
|
|
.section .note.GNU-stack,"",@progbits
|