sysy-data/final_performance_c/asm/large_loop_array_2.s

317 lines
5.1 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "large_loop_array_2.sy"
.globl loop
.p2align 1
.type loop,@function
loop:
blez a2, .LBB0_4
slli a2, a2, 32
srli a2, a2, 32
fmv.w.x fa0, zero
.LBB0_2:
flw ft0, 0(a0)
flw ft1, 0(a1)
fmadd.s fa0, ft0, ft1, fa0
addi a1, a1, 4
addi a2, a2, -1
addi a0, a0, 4
bnez a2, .LBB0_2
ret
.LBB0_4:
fmv.w.x fa0, zero
ret
.Lfunc_end0:
.size loop, .Lfunc_end0-loop
.section .sdata,"aw",@progbits
.p2align 3
.LCPI1_0:
.quad 0xc344536b00000000
.LCPI1_1:
.word 0x3f800000
.zero 4
.LCPI1_2:
.quad 0x3fb999999999999a
.LCPI1_3:
.quad 0x3fc999999999999a
.LCPI1_4:
.quad 0x3eb0c6f7a0b5ed8d
.LCPI1_5:
.quad 0xbeb0c6f7a0b5ed8d
.text
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -2032
sd ra, 2024(sp)
sd s0, 2016(sp)
fsd fs0, 2008(sp)
lui a0, 8
addiw a0, a0, -2000
sub sp, sp, a0
call getint
mv s0, a0
li a0, 22
call _sysy_starttime
lui a0, %hi(COUNT)
lw t0, %lo(COUNT)(a0)
blez t0, .LBB1_13
blez s0, .LBB1_17
li a5, 0
li a1, 0
slli a0, s0, 32
srli t1, a0, 32
fmv.w.x ft0, zero
lui a0, %hi(.LCPI1_1)
flw ft1, %lo(.LCPI1_1)(a0)
lui a0, 838861
addiw t2, a0, -819
lui a0, 104858
lui a2, %hi(.LCPI1_2)
fld ft2, %lo(.LCPI1_2)(a2)
lui a2, %hi(.LCPI1_3)
fld ft4, %lo(.LCPI1_3)(a2)
addiw a4, a0, -1639
lui a0, 4
addiw a0, a0, 8
add a6, sp, a0
addi a7, sp, 8
fmv.s ft7, ft1
fmv.s fa0, ft0
fmv.s ft3, ft0
.LBB1_3:
mulw a0, a1, t2
slliw a2, a0, 31
srliw a0, a0, 1
or a0, a0, a2
fmv.s ft5, ft0
fmv.s ft6, ft1
bgeu a4, a0, .LBB1_5
sext.w a0, a5
blt a0, s0, .LBB1_6
j .LBB1_9
.LBB1_5:
fcvt.d.s ft5, fa0
fadd.d ft5, ft5, ft2
fcvt.s.d ft5, ft5
fcvt.d.s ft6, ft7
fadd.d ft6, ft6, ft4
fcvt.s.d ft6, ft6
sext.w a0, a5
bge a0, s0, .LBB1_9
.LBB1_6:
slli a3, a0, 2
add a2, a6, a3
add a5, a7, a3
.LBB1_7:
fcvt.s.w ft7, a0
fadd.s fa0, ft5, ft7
fsw fa0, 0(a2)
fadd.s ft7, ft6, ft7
fsw ft7, 0(a5)
addi a0, a0, 1
addi a2, a2, 4
addi a5, a5, 4
bne s0, a0, .LBB1_7
mv a5, s0
.LBB1_9:
addi a0, sp, 8
lui a2, 4
addiw a2, a2, 8
add a2, a2, sp
mv a3, t1
fmv.s ft7, ft0
.LBB1_10:
flw fa0, 0(a2)
flw fa1, 0(a0)
fmadd.s ft7, fa0, fa1, ft7
addi a3, a3, -1
addi a0, a0, 4
addi a2, a2, 4
bnez a3, .LBB1_10
addiw a1, a1, 1
fadd.s ft3, ft3, ft7
fmv.s ft7, ft6
fmv.s fa0, ft5
blt a1, t0, .LBB1_3
lui a0, %hi(.LCPI1_0)
fld ft0, %lo(.LCPI1_0)(a0)
fcvt.d.s ft1, ft3
fadd.d fs0, ft1, ft0
j .LBB1_14
.LBB1_13:
lui a0, %hi(.LCPI1_0)
fld fs0, %lo(.LCPI1_0)(a0)
.LBB1_14:
li a0, 39
call _sysy_stoptime
lui a0, %hi(.LCPI1_4)
fld ft0, %lo(.LCPI1_4)(a0)
lui a0, %hi(.LCPI1_5)
fld ft1, %lo(.LCPI1_5)(a0)
fle.d a0, fs0, ft0
xori a0, a0, 1
fle.d a1, ft1, fs0
xori a1, a1, 1
or s0, a0, a1
li a0, 1
bnez s0, .LBB1_16
li a0, 10
.LBB1_16:
call putint
mv a0, s0
lui a1, 8
addiw a1, a1, -2000
add sp, sp, a1
ld ra, 2024(sp)
ld s0, 2016(sp)
fld fs0, 2008(sp)
addi sp, sp, 2032
ret
.LBB1_17:
li a4, 0
li a1, 0
fmv.w.x ft0, zero
lui a0, %hi(.LCPI1_1)
flw ft1, %lo(.LCPI1_1)(a0)
lui a0, 838861
addiw a2, a0, -819
lui a0, 104858
lui a3, %hi(.LCPI1_2)
fld ft2, %lo(.LCPI1_2)(a3)
lui a3, %hi(.LCPI1_3)
fld ft3, %lo(.LCPI1_3)(a3)
lui a3, %hi(.LCPI1_0)
fld fs0, %lo(.LCPI1_0)(a3)
addiw a3, a0, -1639
lui a0, 4
addiw a0, a0, 8
add a6, sp, a0
addi a7, sp, 8
fmv.s ft6, ft1
fmv.s ft7, ft0
j .LBB1_19
.LBB1_18:
addiw a1, a1, 1
fmv.s ft6, ft5
fmv.s ft7, ft4
bge a1, t0, .LBB1_14
.LBB1_19:
mulw a0, a1, a2
slliw a5, a0, 31
srliw a0, a0, 1
or a0, a0, a5
fmv.s ft4, ft0
fmv.s ft5, ft1
bgeu a3, a0, .LBB1_21
sext.w a0, a4
bge a0, s0, .LBB1_18
j .LBB1_22
.LBB1_21:
fcvt.d.s ft4, ft7
fadd.d ft4, ft4, ft2
fcvt.s.d ft4, ft4
fcvt.d.s ft5, ft6
fadd.d ft5, ft5, ft3
fcvt.s.d ft5, ft5
sext.w a0, a4
bge a0, s0, .LBB1_18
.LBB1_22:
slli a5, a0, 2
add a4, a6, a5
add a5, a5, a7
.LBB1_23:
fcvt.s.w ft6, a0
fadd.s ft7, ft4, ft6
fsw ft7, 0(a4)
fadd.s ft6, ft5, ft6
fsw ft6, 0(a5)
addi a0, a0, 1
addi a4, a4, 4
addi a5, a5, 4
bne s0, a0, .LBB1_23
mv a4, s0
j .LBB1_18
.Lfunc_end1:
.size main, .Lfunc_end1-main
.type COUNT,@object
.section .sdata,"aw",@progbits
.globl COUNT
.p2align 2
COUNT:
.word 500000
.size COUNT, 4
.type _sysy_start,@object
.bss
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig