279 lines
4.1 KiB
ArmAsm
279 lines
4.1 KiB
ArmAsm
|
.file "stencil0.sy"
|
||
|
.option pic
|
||
|
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
||
|
.attribute unaligned_access, 0
|
||
|
.attribute stack_align, 16
|
||
|
.text
|
||
|
.align 1
|
||
|
.globl cutout
|
||
|
.type cutout, @function
|
||
|
cutout:
|
||
|
li a4,255
|
||
|
ble a0,a4,.L2
|
||
|
li a0,255
|
||
|
ret
|
||
|
.L2:
|
||
|
sext.w a0,a0
|
||
|
not a5,a0
|
||
|
srai a5,a5,63
|
||
|
and a0,a0,a5
|
||
|
ret
|
||
|
.size cutout, .-cutout
|
||
|
.section .text.startup,"ax",@progbits
|
||
|
.align 1
|
||
|
.globl main
|
||
|
.type main, @function
|
||
|
main:
|
||
|
addi sp,sp,-80
|
||
|
lla a0,image_in
|
||
|
sd ra,72(sp)
|
||
|
sd s8,0(sp)
|
||
|
sd s0,64(sp)
|
||
|
sd s1,56(sp)
|
||
|
sd s2,48(sp)
|
||
|
sd s3,40(sp)
|
||
|
sd s4,32(sp)
|
||
|
sd s5,24(sp)
|
||
|
sd s6,16(sp)
|
||
|
sd s7,8(sp)
|
||
|
call getarray@plt
|
||
|
mv s8,a0
|
||
|
li a0,25
|
||
|
call _sysy_starttime@plt
|
||
|
li t2,-4096
|
||
|
li t4,4096
|
||
|
li t0,524288
|
||
|
lla t3,image_in+2036
|
||
|
lla t5,image_out+4084
|
||
|
lla t1,image_in
|
||
|
lla a6,image_in+4092
|
||
|
li t6,0
|
||
|
addi t2,t2,12
|
||
|
li a7,255
|
||
|
li ra,509
|
||
|
addi t4,t4,-2048
|
||
|
addi t0,t0,-1024
|
||
|
.L6:
|
||
|
lw s4,4(a6)
|
||
|
lw s3,8(a6)
|
||
|
lw s2,-2044(a6)
|
||
|
lw s1,-2040(a6)
|
||
|
lw s0,-2036(t3)
|
||
|
lw a0,-2032(t3)
|
||
|
addiw t6,t6,512
|
||
|
add a1,a6,t2
|
||
|
addi a2,a6,-2036
|
||
|
addi a3,a6,12
|
||
|
addi a4,t5,-2032
|
||
|
li s5,1
|
||
|
.L15:
|
||
|
slliw a5,s1,3
|
||
|
subw a5,a5,s0
|
||
|
lw s0,0(a1)
|
||
|
subw a5,a5,a0
|
||
|
addiw s5,s5,2
|
||
|
subw a5,a5,s0
|
||
|
subw a5,a5,s2
|
||
|
lw s2,0(a2)
|
||
|
subw a5,a5,s2
|
||
|
subw a5,a5,s4
|
||
|
lw s4,0(a3)
|
||
|
subw a5,a5,s3
|
||
|
slliw s6,s2,3
|
||
|
subw a5,a5,s4
|
||
|
subw s6,s6,a0
|
||
|
sext.w s7,a5
|
||
|
subw s6,s6,s0
|
||
|
ble s7,a7,.L11
|
||
|
li a5,255
|
||
|
li s7,255
|
||
|
.L11:
|
||
|
lw a0,4(a1)
|
||
|
not s7,s7
|
||
|
srai s7,s7,63
|
||
|
subw s6,s6,a0
|
||
|
subw s6,s6,s1
|
||
|
lw s1,4(a2)
|
||
|
and a5,a5,s7
|
||
|
sw a5,0(a4)
|
||
|
subw a5,s6,s1
|
||
|
subw a5,a5,s3
|
||
|
lw s3,4(a3)
|
||
|
subw a5,a5,s4
|
||
|
addi a1,a1,8
|
||
|
subw a5,a5,s3
|
||
|
sext.w s6,a5
|
||
|
ble s6,a7,.L13
|
||
|
li a5,255
|
||
|
li s6,255
|
||
|
.L13:
|
||
|
not s6,s6
|
||
|
srai s6,s6,63
|
||
|
and a5,a5,s6
|
||
|
sw a5,4(a4)
|
||
|
addi a2,a2,8
|
||
|
addi a3,a3,8
|
||
|
addi a4,a4,8
|
||
|
bne s5,ra,.L15
|
||
|
addi a3,a6,-8
|
||
|
addi a1,a6,2040
|
||
|
mv a0,t5
|
||
|
mv a2,t3
|
||
|
.L9:
|
||
|
lw a5,0(a3)
|
||
|
lw s2,-4(a2)
|
||
|
lw s1,0(a2)
|
||
|
lw s0,4(a2)
|
||
|
slliw a5,a5,3
|
||
|
lw a4,-4(a3)
|
||
|
subw a5,a5,s2
|
||
|
subw a5,a5,s1
|
||
|
lw s2,4(a3)
|
||
|
subw a5,a5,s0
|
||
|
lw s1,-4(a1)
|
||
|
subw a5,a5,a4
|
||
|
lw s0,0(a1)
|
||
|
lw a4,4(a1)
|
||
|
subw a5,a5,s2
|
||
|
subw a5,a5,s1
|
||
|
subw a5,a5,s0
|
||
|
subw a5,a5,a4
|
||
|
sext.w a4,a5
|
||
|
addi a3,a3,4
|
||
|
ble a4,a7,.L7
|
||
|
li a5,255
|
||
|
li a4,255
|
||
|
.L7:
|
||
|
not a4,a4
|
||
|
srai a4,a4,63
|
||
|
and a5,a5,a4
|
||
|
sw a5,0(a0)
|
||
|
addi a2,a2,4
|
||
|
addi a1,a1,4
|
||
|
addi a0,a0,4
|
||
|
bne a3,a6,.L9
|
||
|
add t3,t3,t4
|
||
|
add t5,t5,t4
|
||
|
add a6,a3,t4
|
||
|
bne t6,t0,.L6
|
||
|
li a4,4096
|
||
|
lla a5,image_out
|
||
|
lla a1,image_in+2097152
|
||
|
addi a4,a4,-2048
|
||
|
.L18:
|
||
|
lw a2,0(t1)
|
||
|
lw a3,2044(t1)
|
||
|
add t1,t1,a4
|
||
|
sw a2,0(a5)
|
||
|
sw a3,2044(a5)
|
||
|
add a5,a5,a4
|
||
|
bne a1,t1,.L18
|
||
|
lla a5,image_in
|
||
|
lla a4,image_out
|
||
|
lla a3,image_in+2048
|
||
|
.L19:
|
||
|
ld a7,0(a5)
|
||
|
ld a6,8(a5)
|
||
|
ld a0,16(a5)
|
||
|
ld a2,24(a5)
|
||
|
sd a7,0(a4)
|
||
|
sd a6,8(a4)
|
||
|
sd a0,16(a4)
|
||
|
sd a2,24(a4)
|
||
|
addi a5,a5,32
|
||
|
addi a4,a4,32
|
||
|
bne a5,a3,.L19
|
||
|
lla a5,image_in+2095104
|
||
|
lla a4,image_out+2095104
|
||
|
.L20:
|
||
|
ld a6,0(a5)
|
||
|
ld a0,8(a5)
|
||
|
ld a2,16(a5)
|
||
|
ld a3,24(a5)
|
||
|
sd a6,0(a4)
|
||
|
sd a0,8(a4)
|
||
|
sd a2,16(a4)
|
||
|
sd a3,24(a4)
|
||
|
addi a5,a5,32
|
||
|
addi a4,a4,32
|
||
|
bne a5,a1,.L20
|
||
|
li a0,61
|
||
|
call _sysy_stoptime@plt
|
||
|
li a0,524288
|
||
|
lla a1,image_out
|
||
|
call putarray@plt
|
||
|
ld ra,72(sp)
|
||
|
ld s0,64(sp)
|
||
|
ld s1,56(sp)
|
||
|
ld s2,48(sp)
|
||
|
ld s3,40(sp)
|
||
|
ld s4,32(sp)
|
||
|
ld s5,24(sp)
|
||
|
ld s6,16(sp)
|
||
|
ld s7,8(sp)
|
||
|
mv a0,s8
|
||
|
ld s8,0(sp)
|
||
|
addi sp,sp,80
|
||
|
jr ra
|
||
|
.size main, .-main
|
||
|
.globl image_out
|
||
|
.globl image_in
|
||
|
.globl _sysy_idx
|
||
|
.globl _sysy_us
|
||
|
.globl _sysy_s
|
||
|
.globl _sysy_m
|
||
|
.globl _sysy_h
|
||
|
.globl _sysy_l2
|
||
|
.globl _sysy_l1
|
||
|
.globl _sysy_end
|
||
|
.globl _sysy_start
|
||
|
.bss
|
||
|
.align 3
|
||
|
.type image_out, @object
|
||
|
.size image_out, 2097152
|
||
|
image_out:
|
||
|
.zero 2097152
|
||
|
.type image_in, @object
|
||
|
.size image_in, 2097152
|
||
|
image_in:
|
||
|
.zero 2097152
|
||
|
.type _sysy_idx, @object
|
||
|
.size _sysy_idx, 4
|
||
|
_sysy_idx:
|
||
|
.zero 4
|
||
|
.zero 4
|
||
|
.type _sysy_us, @object
|
||
|
.size _sysy_us, 4096
|
||
|
_sysy_us:
|
||
|
.zero 4096
|
||
|
.type _sysy_s, @object
|
||
|
.size _sysy_s, 4096
|
||
|
_sysy_s:
|
||
|
.zero 4096
|
||
|
.type _sysy_m, @object
|
||
|
.size _sysy_m, 4096
|
||
|
_sysy_m:
|
||
|
.zero 4096
|
||
|
.type _sysy_h, @object
|
||
|
.size _sysy_h, 4096
|
||
|
_sysy_h:
|
||
|
.zero 4096
|
||
|
.type _sysy_l2, @object
|
||
|
.size _sysy_l2, 4096
|
||
|
_sysy_l2:
|
||
|
.zero 4096
|
||
|
.type _sysy_l1, @object
|
||
|
.size _sysy_l1, 4096
|
||
|
_sysy_l1:
|
||
|
.zero 4096
|
||
|
.type _sysy_end, @object
|
||
|
.size _sysy_end, 16
|
||
|
_sysy_end:
|
||
|
.zero 16
|
||
|
.type _sysy_start, @object
|
||
|
.size _sysy_start, 16
|
||
|
_sysy_start:
|
||
|
.zero 16
|
||
|
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
|
||
|
.section .note.GNU-stack,"",@progbits
|