293 lines
4.5 KiB
ArmAsm
293 lines
4.5 KiB
ArmAsm
|
.text
|
||
|
.attribute 4, 16
|
||
|
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
|
||
|
.file "98_matrix_mul.sy"
|
||
|
.globl mul
|
||
|
.p2align 1
|
||
|
.type mul,@function
|
||
|
mul:
|
||
|
flw ft0, 0(a0)
|
||
|
flw ft1, 0(a3)
|
||
|
flw ft2, 4(a0)
|
||
|
flw ft3, 0(a4)
|
||
|
flw ft4, 8(a0)
|
||
|
flw ft5, 0(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 0(a6)
|
||
|
flw ft0, 0(a0)
|
||
|
flw ft1, 4(a3)
|
||
|
flw ft2, 4(a0)
|
||
|
flw ft3, 4(a4)
|
||
|
flw ft4, 8(a0)
|
||
|
flw ft5, 4(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 4(a6)
|
||
|
flw ft0, 0(a0)
|
||
|
flw ft1, 8(a3)
|
||
|
flw ft2, 4(a0)
|
||
|
flw ft3, 8(a4)
|
||
|
flw ft4, 8(a0)
|
||
|
flw ft5, 8(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 8(a6)
|
||
|
flw ft0, 0(a1)
|
||
|
flw ft1, 0(a3)
|
||
|
flw ft2, 4(a1)
|
||
|
flw ft3, 0(a4)
|
||
|
flw ft4, 8(a1)
|
||
|
flw ft5, 0(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 0(a7)
|
||
|
flw ft0, 0(a1)
|
||
|
flw ft1, 4(a3)
|
||
|
flw ft2, 4(a1)
|
||
|
flw ft3, 4(a4)
|
||
|
flw ft4, 8(a1)
|
||
|
flw ft5, 4(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 4(a7)
|
||
|
flw ft0, 0(a1)
|
||
|
flw ft1, 8(a3)
|
||
|
flw ft2, 4(a1)
|
||
|
flw ft3, 8(a4)
|
||
|
flw ft4, 8(a1)
|
||
|
flw ft5, 8(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 8(a7)
|
||
|
flw ft0, 0(a2)
|
||
|
flw ft1, 0(a3)
|
||
|
flw ft2, 4(a2)
|
||
|
flw ft3, 0(a4)
|
||
|
flw ft4, 8(a2)
|
||
|
flw ft5, 0(a5)
|
||
|
ld a0, 0(sp)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 0(a0)
|
||
|
flw ft0, 0(a2)
|
||
|
flw ft1, 4(a3)
|
||
|
flw ft2, 4(a2)
|
||
|
flw ft3, 4(a4)
|
||
|
flw ft4, 8(a2)
|
||
|
flw ft5, 4(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 4(a0)
|
||
|
flw ft0, 0(a2)
|
||
|
flw ft1, 8(a3)
|
||
|
flw ft2, 4(a2)
|
||
|
flw ft3, 8(a4)
|
||
|
flw ft4, 8(a2)
|
||
|
flw ft5, 8(a5)
|
||
|
fmul.s ft2, ft2, ft3
|
||
|
fmadd.s ft0, ft0, ft1, ft2
|
||
|
fmadd.s ft0, ft4, ft5, ft0
|
||
|
fsw ft0, 8(a0)
|
||
|
li a0, 0
|
||
|
ret
|
||
|
.Lfunc_end0:
|
||
|
.size mul, .Lfunc_end0-mul
|
||
|
|
||
|
.globl main
|
||
|
.p2align 1
|
||
|
.type main,@function
|
||
|
main:
|
||
|
addi sp, sp, -80
|
||
|
sd ra, 72(sp)
|
||
|
sd s0, 64(sp)
|
||
|
sd s1, 56(sp)
|
||
|
sd s2, 48(sp)
|
||
|
lui s2, %hi(N)
|
||
|
li a0, 3
|
||
|
sw a0, %lo(N)(s2)
|
||
|
lui a1, %hi(M)
|
||
|
sw a0, %lo(M)(a1)
|
||
|
lui a1, %hi(L)
|
||
|
sw a0, %lo(L)(a1)
|
||
|
sw zero, 24(sp)
|
||
|
lui a0, 263168
|
||
|
sw a0, 28(sp)
|
||
|
lui a1, 265216
|
||
|
sw a1, 32(sp)
|
||
|
sw zero, 12(sp)
|
||
|
sw a0, 16(sp)
|
||
|
sw a1, 20(sp)
|
||
|
sw zero, 0(sp)
|
||
|
sw a0, 4(sp)
|
||
|
sw a1, 8(sp)
|
||
|
li a0, 0
|
||
|
call putint
|
||
|
lw a0, %lo(N)(s2)
|
||
|
li a1, 2
|
||
|
blt a0, a1, .LBB1_3
|
||
|
addi s1, sp, 28
|
||
|
li s0, 1
|
||
|
.LBB1_2:
|
||
|
flw ft0, 0(s1)
|
||
|
fcvt.w.s a0, ft0, rtz
|
||
|
call putint
|
||
|
lw a0, %lo(N)(s2)
|
||
|
addi s0, s0, 1
|
||
|
addi s1, s1, 4
|
||
|
blt s0, a0, .LBB1_2
|
||
|
.LBB1_3:
|
||
|
li a0, 10
|
||
|
call putch
|
||
|
lw a0, %lo(N)(s2)
|
||
|
blez a0, .LBB1_7
|
||
|
li a0, 0
|
||
|
call putint
|
||
|
lui s2, %hi(N)
|
||
|
lw a0, %lo(N)(s2)
|
||
|
li a1, 2
|
||
|
blt a0, a1, .LBB1_7
|
||
|
addi s1, sp, 16
|
||
|
li s0, 1
|
||
|
.LBB1_6:
|
||
|
flw ft0, 0(s1)
|
||
|
fcvt.w.s a0, ft0, rtz
|
||
|
call putint
|
||
|
lw a0, %lo(N)(s2)
|
||
|
addi s0, s0, 1
|
||
|
addi s1, s1, 4
|
||
|
blt s0, a0, .LBB1_6
|
||
|
.LBB1_7:
|
||
|
li a0, 10
|
||
|
call putch
|
||
|
lui s0, %hi(N)
|
||
|
lw a0, %lo(N)(s0)
|
||
|
blez a0, .LBB1_11
|
||
|
li a0, 0
|
||
|
call putint
|
||
|
lw a0, %lo(N)(s0)
|
||
|
li a1, 2
|
||
|
blt a0, a1, .LBB1_11
|
||
|
addi s0, sp, 4
|
||
|
li s1, 1
|
||
|
lui s2, %hi(N)
|
||
|
.LBB1_10:
|
||
|
flw ft0, 0(s0)
|
||
|
fcvt.w.s a0, ft0, rtz
|
||
|
call putint
|
||
|
lw a0, %lo(N)(s2)
|
||
|
addi s1, s1, 1
|
||
|
addi s0, s0, 4
|
||
|
blt s1, a0, .LBB1_10
|
||
|
.LBB1_11:
|
||
|
li a0, 10
|
||
|
call putch
|
||
|
li a0, 0
|
||
|
ld ra, 72(sp)
|
||
|
ld s0, 64(sp)
|
||
|
ld s1, 56(sp)
|
||
|
ld s2, 48(sp)
|
||
|
addi sp, sp, 80
|
||
|
ret
|
||
|
.Lfunc_end1:
|
||
|
.size main, .Lfunc_end1-main
|
||
|
|
||
|
.type N,@object
|
||
|
.section .sbss,"aw",@nobits
|
||
|
.globl N
|
||
|
.p2align 2
|
||
|
N:
|
||
|
.word 0
|
||
|
.size N, 4
|
||
|
|
||
|
.type M,@object
|
||
|
.globl M
|
||
|
.p2align 2
|
||
|
M:
|
||
|
.word 0
|
||
|
.size M, 4
|
||
|
|
||
|
.type L,@object
|
||
|
.globl L
|
||
|
.p2align 2
|
||
|
L:
|
||
|
.word 0
|
||
|
.size L, 4
|
||
|
|
||
|
.type _sysy_start,@object
|
||
|
.bss
|
||
|
.globl _sysy_start
|
||
|
.p2align 3
|
||
|
_sysy_start:
|
||
|
.zero 16
|
||
|
.size _sysy_start, 16
|
||
|
|
||
|
.type _sysy_end,@object
|
||
|
.globl _sysy_end
|
||
|
.p2align 3
|
||
|
_sysy_end:
|
||
|
.zero 16
|
||
|
.size _sysy_end, 16
|
||
|
|
||
|
.type _sysy_l1,@object
|
||
|
.globl _sysy_l1
|
||
|
.p2align 2
|
||
|
_sysy_l1:
|
||
|
.zero 4096
|
||
|
.size _sysy_l1, 4096
|
||
|
|
||
|
.type _sysy_l2,@object
|
||
|
.globl _sysy_l2
|
||
|
.p2align 2
|
||
|
_sysy_l2:
|
||
|
.zero 4096
|
||
|
.size _sysy_l2, 4096
|
||
|
|
||
|
.type _sysy_h,@object
|
||
|
.globl _sysy_h
|
||
|
.p2align 2
|
||
|
_sysy_h:
|
||
|
.zero 4096
|
||
|
.size _sysy_h, 4096
|
||
|
|
||
|
.type _sysy_m,@object
|
||
|
.globl _sysy_m
|
||
|
.p2align 2
|
||
|
_sysy_m:
|
||
|
.zero 4096
|
||
|
.size _sysy_m, 4096
|
||
|
|
||
|
.type _sysy_s,@object
|
||
|
.globl _sysy_s
|
||
|
.p2align 2
|
||
|
_sysy_s:
|
||
|
.zero 4096
|
||
|
.size _sysy_s, 4096
|
||
|
|
||
|
.type _sysy_us,@object
|
||
|
.globl _sysy_us
|
||
|
.p2align 2
|
||
|
_sysy_us:
|
||
|
.zero 4096
|
||
|
.size _sysy_us, 4096
|
||
|
|
||
|
.type _sysy_idx,@object
|
||
|
.section .sbss,"aw",@nobits
|
||
|
.globl _sysy_idx
|
||
|
.p2align 2
|
||
|
_sysy_idx:
|
||
|
.word 0
|
||
|
.size _sysy_idx, 4
|
||
|
|
||
|
.ident "Debian clang version 14.0.6"
|
||
|
.section ".note.GNU-stack","",@progbits
|
||
|
.addrsig
|