227 lines
3.4 KiB
ArmAsm
227 lines
3.4 KiB
ArmAsm
.text
|
|
.attribute 4, 16
|
|
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
|
|
.file "stencil1.sy"
|
|
.globl cutout
|
|
.p2align 1
|
|
.type cutout,@function
|
|
cutout:
|
|
li a1, 255
|
|
bge a0, a1, .LBB0_3
|
|
blez a0, .LBB0_4
|
|
.LBB0_2:
|
|
ret
|
|
.LBB0_3:
|
|
li a0, 255
|
|
bgtz a0, .LBB0_2
|
|
.LBB0_4:
|
|
li a0, 0
|
|
ret
|
|
.Lfunc_end0:
|
|
.size cutout, .Lfunc_end0-cutout
|
|
|
|
.globl main
|
|
.p2align 1
|
|
.type main,@function
|
|
main:
|
|
addi sp, sp, -48
|
|
sd ra, 40(sp)
|
|
sd s0, 32(sp)
|
|
sd s1, 24(sp)
|
|
sd s2, 16(sp)
|
|
sd s3, 8(sp)
|
|
sd s4, 0(sp)
|
|
lui a0, %hi(image_in)
|
|
addi s3, a0, %lo(image_in)
|
|
mv a0, s3
|
|
call getarray
|
|
mv s2, a0
|
|
li a0, 24
|
|
call _sysy_starttime
|
|
lui a0, %hi(image_out)
|
|
addi a7, a0, %lo(image_out)
|
|
li t0, 1
|
|
lui t1, 2
|
|
lui t2, 1
|
|
addiw t3, t2, 4
|
|
li a6, 1023
|
|
j .LBB1_2
|
|
.LBB1_1:
|
|
addi t0, t0, 1
|
|
addi s3, s3, 4
|
|
addi a7, a7, 4
|
|
beq t0, a6, .LBB1_8
|
|
.LBB1_2:
|
|
li s0, 1022
|
|
mv a0, a7
|
|
mv s1, s3
|
|
j .LBB1_4
|
|
.LBB1_3:
|
|
add a1, a0, t3
|
|
sw a5, 0(a1)
|
|
add s1, s1, t2
|
|
addi s0, s0, -1
|
|
add a0, a0, t2
|
|
beqz s0, .LBB1_1
|
|
.LBB1_4:
|
|
add a5, s1, t3
|
|
lw a1, 0(a5)
|
|
add a2, s1, t1
|
|
slliw t4, a1, 3
|
|
lw t5, 0(s1)
|
|
lw a4, 4(s1)
|
|
lw a1, 8(s1)
|
|
lw a3, -4(a5)
|
|
lw a5, 4(a5)
|
|
lw t6, 0(a2)
|
|
lw s4, 4(a2)
|
|
lw a2, 8(a2)
|
|
addw a4, t5, a4
|
|
addw a1, a1, a4
|
|
addw a1, a1, a3
|
|
addw a1, a1, a5
|
|
addw a1, a1, t6
|
|
addw a1, a1, s4
|
|
addw a1, a1, a2
|
|
subw a5, t4, a1
|
|
li a1, 255
|
|
blt a5, a1, .LBB1_6
|
|
li a5, 255
|
|
.LBB1_6:
|
|
bgtz a5, .LBB1_3
|
|
li a5, 0
|
|
j .LBB1_3
|
|
.LBB1_8:
|
|
lui a0, %hi(image_out)
|
|
addi a0, a0, %lo(image_out)
|
|
lui a1, %hi(image_in)
|
|
addi a1, a1, %lo(image_in)
|
|
li a2, 1024
|
|
lui a3, 1
|
|
addiw a4, a3, -4
|
|
.LBB1_9:
|
|
lw a5, 0(a1)
|
|
add s1, a1, a4
|
|
lw s1, 0(s1)
|
|
sw a5, 0(a0)
|
|
add a5, a0, a4
|
|
sw s1, 0(a5)
|
|
add a0, a0, a3
|
|
addi a2, a2, -1
|
|
add a1, a1, a3
|
|
bnez a2, .LBB1_9
|
|
lui a0, %hi(image_out)
|
|
addi s1, a0, %lo(image_out)
|
|
lui a0, %hi(image_in)
|
|
addi s0, a0, %lo(image_in)
|
|
lui a2, 1
|
|
mv a0, s1
|
|
mv a1, s0
|
|
call memcpy@plt
|
|
lui a1, 1023
|
|
add a0, s1, a1
|
|
add a1, a1, s0
|
|
lui a2, 1
|
|
call memcpy@plt
|
|
li a0, 60
|
|
call _sysy_stoptime
|
|
lui a0, 256
|
|
mv a1, s1
|
|
call putarray
|
|
mv a0, s2
|
|
ld ra, 40(sp)
|
|
ld s0, 32(sp)
|
|
ld s1, 24(sp)
|
|
ld s2, 16(sp)
|
|
ld s3, 8(sp)
|
|
ld s4, 0(sp)
|
|
addi sp, sp, 48
|
|
ret
|
|
.Lfunc_end1:
|
|
.size main, .Lfunc_end1-main
|
|
|
|
.type image_in,@object
|
|
.bss
|
|
.globl image_in
|
|
.p2align 2
|
|
image_in:
|
|
.zero 4194304
|
|
.size image_in, 4194304
|
|
|
|
.type image_out,@object
|
|
.globl image_out
|
|
.p2align 2
|
|
image_out:
|
|
.zero 4194304
|
|
.size image_out, 4194304
|
|
|
|
.type _sysy_start,@object
|
|
.globl _sysy_start
|
|
.p2align 3
|
|
_sysy_start:
|
|
.zero 16
|
|
.size _sysy_start, 16
|
|
|
|
.type _sysy_end,@object
|
|
.globl _sysy_end
|
|
.p2align 3
|
|
_sysy_end:
|
|
.zero 16
|
|
.size _sysy_end, 16
|
|
|
|
.type _sysy_l1,@object
|
|
.globl _sysy_l1
|
|
.p2align 2
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.size _sysy_l1, 4096
|
|
|
|
.type _sysy_l2,@object
|
|
.globl _sysy_l2
|
|
.p2align 2
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.size _sysy_l2, 4096
|
|
|
|
.type _sysy_h,@object
|
|
.globl _sysy_h
|
|
.p2align 2
|
|
_sysy_h:
|
|
.zero 4096
|
|
.size _sysy_h, 4096
|
|
|
|
.type _sysy_m,@object
|
|
.globl _sysy_m
|
|
.p2align 2
|
|
_sysy_m:
|
|
.zero 4096
|
|
.size _sysy_m, 4096
|
|
|
|
.type _sysy_s,@object
|
|
.globl _sysy_s
|
|
.p2align 2
|
|
_sysy_s:
|
|
.zero 4096
|
|
.size _sysy_s, 4096
|
|
|
|
.type _sysy_us,@object
|
|
.globl _sysy_us
|
|
.p2align 2
|
|
_sysy_us:
|
|
.zero 4096
|
|
.size _sysy_us, 4096
|
|
|
|
.type _sysy_idx,@object
|
|
.section .sbss,"aw",@nobits
|
|
.globl _sysy_idx
|
|
.p2align 2
|
|
_sysy_idx:
|
|
.word 0
|
|
.size _sysy_idx, 4
|
|
|
|
.ident "Debian clang version 14.0.6"
|
|
.section ".note.GNU-stack","",@progbits
|
|
.addrsig
|
|
.addrsig_sym image_in
|
|
.addrsig_sym image_out
|