sysy-data/final_performance_c/asm/large_loop_array_2.s

253 lines
3.8 KiB
ArmAsm
Raw Normal View History

2024-06-14 13:10:27 +08:00
.file "large_loop_array_2.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl loop
.type loop, @function
loop:
fmv.w.x fa0,zero
ble a2,zero,.L4
slli a2,a2,2
add a5,a0,a2
.L3:
flw fa4,0(a0)
flw fa5,0(a1)
addi a0,a0,4
addi a1,a1,4
fmadd.s fa0,fa4,fa5,fa0
bne a5,a0,.L3
ret
.L4:
ret
.size loop, .-loop
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-32
li t0,-32768
sd ra,24(sp)
sd s0,16(sp)
fsd fs0,8(sp)
add sp,sp,t0
call getint@plt
mv s0,a0
li a0,22
call _sysy_starttime@plt
lw a7,.LANCHOR0
ble a7,zero,.L8
li a6,-32768
li a5,32768
add a5,a5,a6
add a6,a5,sp
flw fa0,.LC0,a5
fld ft1,.LC1,a5
fld ft0,.LC2,a5
li t3,-16384
li a5,32768
add a5,a5,t3
li a1,0
li t4,10
add t1,a5,sp
remw a5,a1,t4
fmv.w.x fa1,zero
slli a2,s0,2
fmv.s fa2,fa0
fmv.s fs0,fa1
add a2,a2,a6
li a0,0
bne a5,zero,.L24
.L38:
fcvt.d.s fa1,fa1
fcvt.d.s fa2,fa2
fadd.d fa1,fa1,ft1
fadd.d fa2,fa2,ft0
fcvt.s.d fa1,fa1
fcvt.s.d fa2,fa2
ble s0,a0,.L36
.L12:
slli a4,a0,2
add a3,a6,a4
mv a5,a0
add a4,t1,a4
.L10:
fcvt.s.w fa5,a5
addi a3,a3,4
addi a4,a4,4
fadd.s fa4,fa5,fa1
fadd.s fa5,fa5,fa2
addiw a5,a5,1
fsw fa4,-4(a3)
fsw fa5,-4(a4)
bne s0,a5,.L10
.L11:
fmv.w.x fa5,zero
ble s0,zero,.L13
li a4,32768
add a4,a4,t3
add a3,a4,sp
mv a4,a6
.L14:
flw fa3,0(a4)
flw fa4,0(a3)
addi a4,a4,4
addi a3,a3,4
fmadd.s fa5,fa3,fa4,fa5
bne a2,a4,.L14
.L13:
addiw a1,a1,1
fadd.s fs0,fs0,fa5
beq a1,a7,.L15
ble s0,a0,.L37
mv a0,a5
remw a5,a1,t4
beq a5,zero,.L38
.L24:
fmv.s fa2,fa0
fmv.w.x fa1,zero
bgt s0,a0,.L12
.L36:
mv a5,a0
j .L11
.L37:
li a3,-16384
.L19:
fmv.w.x fa5,zero
ble s0,zero,.L17
li a5,32768
add a5,a5,a3
add a4,a5,sp
mv a5,a6
.L18:
flw fa3,0(a5)
flw fa4,0(a4)
addi a5,a5,4
addi a4,a4,4
fmadd.s fa5,fa3,fa4,fa5
bne a2,a5,.L18
.L17:
addiw a1,a1,1
fadd.s fs0,fs0,fa5
bgt a7,a1,.L19
.L15:
li a0,39
call _sysy_stoptime@plt
fcvt.d.s fa5,fs0
fld fa3,.LC3,a5
fld fa4,.LC4,a5
fsub.d fa5,fa5,fa3
fle.d a5,fa5,fa4
beq a5,zero,.L20
fld fa4,.LC5,a5
fge.d a5,fa5,fa4
bne a5,zero,.L39
.L20:
li a0,1
call putint@plt
li a0,1
.L34:
li t0,32768
add sp,sp,t0
ld ra,24(sp)
ld s0,16(sp)
fld fs0,8(sp)
addi sp,sp,32
jr ra
.L39:
li a0,10
call putint@plt
li a0,0
j .L34
.L8:
li a0,39
call _sysy_stoptime@plt
j .L20
.size main, .-main
.globl COUNT
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.section .rodata.cst4,"aM",@progbits,4
.align 2
.LC0:
.word 1065353216
.section .rodata.cst8,"aM",@progbits,8
.align 3
.LC1:
.word -1717986918
.word 1069128089
.align 3
.LC2:
.word -1717986918
.word 1070176665
.align 3
.LC3:
.word 0
.word 1128551275
.align 3
.LC4:
.word -1598689907
.word 1051772663
.align 3
.LC5:
.word -1598689907
.word -1095710985
.data
.align 2
.set .LANCHOR0,. + 0
.type COUNT, @object
.size COUNT, 4
COUNT:
.word 500000
.bss
.align 3
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits