sysy-data/final_performance_c/asm/matmul3.s

270 lines
3.9 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "matmul3.sy"
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -32
sd ra, 24(sp)
sd s0, 16(sp)
sd s1, 8(sp)
sd s2, 0(sp)
lui a0, %hi(a)
addi s0, a0, %lo(a)
li s2, 1000
li s1, 1000
.LBB0_1:
mv a0, s0
call getarray
bne a0, s2, .LBB0_30
addi a0, s0, 2000
addi s1, s1, -1
addi s0, a0, 2000
bnez s1, .LBB0_1
li a0, 24
call _sysy_starttime
li a0, 0
lui a1, %hi(a)
addi s1, a1, %lo(a)
lui a1, %hi(b)
addi a3, a1, %lo(b)
li a2, 1000
.LBB0_4:
li a4, 1000
mv a5, a3
mv a1, s1
.LBB0_5:
lw s0, 0(a1)
sw s0, 0(a5)
addi a1, a1, 2000
addi a1, a1, 2000
addi a4, a4, -1
addi a5, a5, 4
bnez a4, .LBB0_5
addi a0, a0, 1
addi s1, s1, 4
addi a1, a3, 2000
addi a3, a1, 2000
bne a0, a2, .LBB0_4
li t3, 0
lui a1, %hi(a)
addi t2, a1, %lo(a)
lui a1, %hi(b)
addi a6, a1, %lo(b)
lui a1, 1
addiw t0, a1, -96
lui a1, %hi(c)
addi t1, a1, %lo(c)
li a7, 1000
.LBB0_8:
li s0, 0
mv a4, a6
.LBB0_9:
li a5, 0
li s1, 1000
mv a1, a4
mv a2, t2
.LBB0_10:
lw a3, 0(a2)
lw a0, 0(a1)
mulw a0, a0, a3
addw a5, a5, a0
addi a2, a2, 4
addi a0, a1, 2000
addi s1, s1, -1
addi a1, a0, 2000
bnez s1, .LBB0_10
mul a0, t3, t0
slli a1, s0, 2
add a0, a0, a1
add a0, a0, t1
sw a5, 0(a0)
addi s0, s0, 1
addi a4, a4, 4
bne s0, a7, .LBB0_9
addi t3, t3, 1
addi a0, t2, 2000
addi t2, a0, 2000
bne t3, a7, .LBB0_8
li a0, 0
lui a1, %hi(c)
addi s1, a1, %lo(c)
lui a1, 524288
addiw a1, a1, -1
li a3, 1000
.LBB0_14:
li a5, 1000
mv a2, s1
mv a4, a1
j .LBB0_16
.LBB0_15:
addi a5, a5, -1
addi a2, a2, 4
mv a4, s0
beqz a5, .LBB0_18
.LBB0_16:
lw s0, 0(a2)
blt s0, a4, .LBB0_15
mv s0, a4
j .LBB0_15
.LBB0_18:
li a2, 1000
mv a4, s1
.LBB0_19:
sw s0, 0(a4)
addi a2, a2, -1
addi a4, a4, 4
bnez a2, .LBB0_19
addi a0, a0, 1
addi a2, s1, 2000
addi s1, a2, 2000
bne a0, a3, .LBB0_14
li a0, 0
lui a1, %hi(c)
addi s1, a1, %lo(c)
li a1, 1000
mv a2, s1
.LBB0_22:
li a4, 1000
mv a5, s1
mv a3, a2
.LBB0_23:
lw s0, 0(a3)
negw s0, s0
sw s0, 0(a5)
addi a3, a3, 2000
addi a3, a3, 2000
addi a4, a4, -1
addi a5, a5, 4
bnez a4, .LBB0_23
addi a0, a0, 1
addi a2, a2, 4
addi a3, s1, 2000
addi s1, a3, 2000
bne a0, a1, .LBB0_22
li a0, 0
li s0, 0
lui a1, %hi(c)
addi a2, a1, %lo(c)
li a1, 1000
.LBB0_26:
li a3, 1000
mv a4, a2
.LBB0_27:
lw a5, 0(a4)
addw s0, s0, a5
addi a3, a3, -1
addi a4, a4, 4
bnez a3, .LBB0_27
addi a0, a0, 1
addi a2, a2, 2000
addi a2, a2, 2000
bne a0, a1, .LBB0_26
li a0, 93
call _sysy_stoptime
mv a0, s0
call putint
li a0, 0
.LBB0_30:
ld ra, 24(sp)
ld s0, 16(sp)
ld s1, 8(sp)
ld s2, 0(sp)
addi sp, sp, 32
ret
.Lfunc_end0:
.size main, .Lfunc_end0-main
.type a,@object
.bss
.globl a
.p2align 2
a:
.zero 4000000
.size a, 4000000
.type b,@object
.globl b
.p2align 2
b:
.zero 4000000
.size b, 4000000
.type c,@object
.globl c
.p2align 2
c:
.zero 4000000
.size c, 4000000
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym a