253 lines
3.8 KiB
ArmAsm
253 lines
3.8 KiB
ArmAsm
.file "large_loop_array_2.sy"
|
|
.option pic
|
|
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
|
.attribute unaligned_access, 0
|
|
.attribute stack_align, 16
|
|
.text
|
|
.align 1
|
|
.globl loop
|
|
.type loop, @function
|
|
loop:
|
|
fmv.w.x fa0,zero
|
|
ble a2,zero,.L4
|
|
slli a2,a2,2
|
|
add a5,a0,a2
|
|
.L3:
|
|
flw fa4,0(a0)
|
|
flw fa5,0(a1)
|
|
addi a0,a0,4
|
|
addi a1,a1,4
|
|
fmadd.s fa0,fa4,fa5,fa0
|
|
bne a5,a0,.L3
|
|
ret
|
|
.L4:
|
|
ret
|
|
.size loop, .-loop
|
|
.section .text.startup,"ax",@progbits
|
|
.align 1
|
|
.globl main
|
|
.type main, @function
|
|
main:
|
|
addi sp,sp,-32
|
|
li t0,-32768
|
|
sd ra,24(sp)
|
|
sd s0,16(sp)
|
|
fsd fs0,8(sp)
|
|
add sp,sp,t0
|
|
call getint@plt
|
|
mv s0,a0
|
|
li a0,22
|
|
call _sysy_starttime@plt
|
|
lw a7,.LANCHOR0
|
|
ble a7,zero,.L8
|
|
li a6,-32768
|
|
li a5,32768
|
|
add a5,a5,a6
|
|
add a6,a5,sp
|
|
flw fa0,.LC0,a5
|
|
fld ft1,.LC1,a5
|
|
fld ft0,.LC2,a5
|
|
li t3,-16384
|
|
li a5,32768
|
|
add a5,a5,t3
|
|
li a1,0
|
|
li t4,10
|
|
add t1,a5,sp
|
|
remw a5,a1,t4
|
|
fmv.w.x fa1,zero
|
|
slli a2,s0,2
|
|
fmv.s fa2,fa0
|
|
fmv.s fs0,fa1
|
|
add a2,a2,a6
|
|
li a0,0
|
|
bne a5,zero,.L24
|
|
.L38:
|
|
fcvt.d.s fa1,fa1
|
|
fcvt.d.s fa2,fa2
|
|
fadd.d fa1,fa1,ft1
|
|
fadd.d fa2,fa2,ft0
|
|
fcvt.s.d fa1,fa1
|
|
fcvt.s.d fa2,fa2
|
|
ble s0,a0,.L36
|
|
.L12:
|
|
slli a4,a0,2
|
|
add a3,a6,a4
|
|
mv a5,a0
|
|
add a4,t1,a4
|
|
.L10:
|
|
fcvt.s.w fa5,a5
|
|
addi a3,a3,4
|
|
addi a4,a4,4
|
|
fadd.s fa4,fa5,fa1
|
|
fadd.s fa5,fa5,fa2
|
|
addiw a5,a5,1
|
|
fsw fa4,-4(a3)
|
|
fsw fa5,-4(a4)
|
|
bne s0,a5,.L10
|
|
.L11:
|
|
fmv.w.x fa5,zero
|
|
ble s0,zero,.L13
|
|
li a4,32768
|
|
add a4,a4,t3
|
|
add a3,a4,sp
|
|
mv a4,a6
|
|
.L14:
|
|
flw fa3,0(a4)
|
|
flw fa4,0(a3)
|
|
addi a4,a4,4
|
|
addi a3,a3,4
|
|
fmadd.s fa5,fa3,fa4,fa5
|
|
bne a2,a4,.L14
|
|
.L13:
|
|
addiw a1,a1,1
|
|
fadd.s fs0,fs0,fa5
|
|
beq a1,a7,.L15
|
|
ble s0,a0,.L37
|
|
mv a0,a5
|
|
remw a5,a1,t4
|
|
beq a5,zero,.L38
|
|
.L24:
|
|
fmv.s fa2,fa0
|
|
fmv.w.x fa1,zero
|
|
bgt s0,a0,.L12
|
|
.L36:
|
|
mv a5,a0
|
|
j .L11
|
|
.L37:
|
|
li a3,-16384
|
|
.L19:
|
|
fmv.w.x fa5,zero
|
|
ble s0,zero,.L17
|
|
li a5,32768
|
|
add a5,a5,a3
|
|
add a4,a5,sp
|
|
mv a5,a6
|
|
.L18:
|
|
flw fa3,0(a5)
|
|
flw fa4,0(a4)
|
|
addi a5,a5,4
|
|
addi a4,a4,4
|
|
fmadd.s fa5,fa3,fa4,fa5
|
|
bne a2,a5,.L18
|
|
.L17:
|
|
addiw a1,a1,1
|
|
fadd.s fs0,fs0,fa5
|
|
bgt a7,a1,.L19
|
|
.L15:
|
|
li a0,39
|
|
call _sysy_stoptime@plt
|
|
fcvt.d.s fa5,fs0
|
|
fld fa3,.LC3,a5
|
|
fld fa4,.LC4,a5
|
|
fsub.d fa5,fa5,fa3
|
|
fle.d a5,fa5,fa4
|
|
beq a5,zero,.L20
|
|
fld fa4,.LC5,a5
|
|
fge.d a5,fa5,fa4
|
|
bne a5,zero,.L39
|
|
.L20:
|
|
li a0,1
|
|
call putint@plt
|
|
li a0,1
|
|
.L34:
|
|
li t0,32768
|
|
add sp,sp,t0
|
|
ld ra,24(sp)
|
|
ld s0,16(sp)
|
|
fld fs0,8(sp)
|
|
addi sp,sp,32
|
|
jr ra
|
|
.L39:
|
|
li a0,10
|
|
call putint@plt
|
|
li a0,0
|
|
j .L34
|
|
.L8:
|
|
li a0,39
|
|
call _sysy_stoptime@plt
|
|
j .L20
|
|
.size main, .-main
|
|
.globl COUNT
|
|
.globl _sysy_idx
|
|
.globl _sysy_us
|
|
.globl _sysy_s
|
|
.globl _sysy_m
|
|
.globl _sysy_h
|
|
.globl _sysy_l2
|
|
.globl _sysy_l1
|
|
.globl _sysy_end
|
|
.globl _sysy_start
|
|
.section .rodata.cst4,"aM",@progbits,4
|
|
.align 2
|
|
.LC0:
|
|
.word 1065353216
|
|
.section .rodata.cst8,"aM",@progbits,8
|
|
.align 3
|
|
.LC1:
|
|
.word -1717986918
|
|
.word 1069128089
|
|
.align 3
|
|
.LC2:
|
|
.word -1717986918
|
|
.word 1070176665
|
|
.align 3
|
|
.LC3:
|
|
.word 0
|
|
.word 1128551275
|
|
.align 3
|
|
.LC4:
|
|
.word -1598689907
|
|
.word 1051772663
|
|
.align 3
|
|
.LC5:
|
|
.word -1598689907
|
|
.word -1095710985
|
|
.data
|
|
.align 2
|
|
.set .LANCHOR0,. + 0
|
|
.type COUNT, @object
|
|
.size COUNT, 4
|
|
COUNT:
|
|
.word 500000
|
|
.bss
|
|
.align 3
|
|
.type _sysy_idx, @object
|
|
.size _sysy_idx, 4
|
|
_sysy_idx:
|
|
.zero 4
|
|
.zero 4
|
|
.type _sysy_us, @object
|
|
.size _sysy_us, 4096
|
|
_sysy_us:
|
|
.zero 4096
|
|
.type _sysy_s, @object
|
|
.size _sysy_s, 4096
|
|
_sysy_s:
|
|
.zero 4096
|
|
.type _sysy_m, @object
|
|
.size _sysy_m, 4096
|
|
_sysy_m:
|
|
.zero 4096
|
|
.type _sysy_h, @object
|
|
.size _sysy_h, 4096
|
|
_sysy_h:
|
|
.zero 4096
|
|
.type _sysy_l2, @object
|
|
.size _sysy_l2, 4096
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.type _sysy_l1, @object
|
|
.size _sysy_l1, 4096
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.type _sysy_end, @object
|
|
.size _sysy_end, 16
|
|
_sysy_end:
|
|
.zero 16
|
|
.type _sysy_start, @object
|
|
.size _sysy_start, 16
|
|
_sysy_start:
|
|
.zero 16
|
|
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
|
|
.section .note.GNU-stack,"",@progbits
|