277 lines
4.3 KiB
ArmAsm
277 lines
4.3 KiB
ArmAsm
.file "98_matrix_mul.sy"
|
|
.option pic
|
|
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
|
.attribute unaligned_access, 0
|
|
.attribute stack_align, 16
|
|
.text
|
|
.align 1
|
|
.globl mul
|
|
.type mul, @function
|
|
mul:
|
|
flw fa4,0(a4)
|
|
flw fa5,4(a0)
|
|
flw fa3,0(a3)
|
|
mv t1,a0
|
|
fmul.s fa5,fa5,fa4
|
|
flw fa4,0(a0)
|
|
ld t3,0(sp)
|
|
fmadd.s fa4,fa4,fa3,fa5
|
|
flw fa3,0(a5)
|
|
flw fa5,8(a0)
|
|
fmadd.s fa5,fa5,fa3,fa4
|
|
fsw fa5,0(a6)
|
|
flw fa5,4(a4)
|
|
flw fa3,4(a0)
|
|
flw fa1,4(a3)
|
|
flw fa4,0(a0)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,4(a5)
|
|
flw fa5,8(a0)
|
|
li a0,0
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,4(a6)
|
|
flw fa5,8(a4)
|
|
flw fa3,4(t1)
|
|
flw fa1,8(a3)
|
|
flw fa4,0(t1)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,8(a5)
|
|
flw fa5,8(t1)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,8(a6)
|
|
flw fa5,0(a4)
|
|
flw fa3,4(a1)
|
|
flw fa4,0(a1)
|
|
flw fa1,0(a3)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,0(a5)
|
|
flw fa5,8(a1)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,0(a7)
|
|
flw fa5,4(a4)
|
|
flw fa3,4(a1)
|
|
flw fa4,0(a1)
|
|
flw fa1,4(a3)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,4(a5)
|
|
flw fa5,8(a1)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,4(a7)
|
|
flw fa5,8(a4)
|
|
flw fa3,4(a1)
|
|
flw fa1,8(a3)
|
|
flw fa4,0(a1)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,8(a5)
|
|
flw fa5,8(a1)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,8(a7)
|
|
flw fa5,0(a4)
|
|
flw fa3,4(a2)
|
|
flw fa1,0(a3)
|
|
flw fa4,0(a2)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,0(a5)
|
|
flw fa5,8(a2)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,0(t3)
|
|
flw fa5,4(a4)
|
|
flw fa3,4(a2)
|
|
flw fa1,4(a3)
|
|
flw fa4,0(a2)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,4(a5)
|
|
flw fa5,8(a2)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,4(t3)
|
|
flw fa5,8(a4)
|
|
flw fa3,4(a2)
|
|
flw fa4,0(a2)
|
|
flw fa1,8(a3)
|
|
fmul.s fa3,fa3,fa5
|
|
flw fa2,8(a5)
|
|
flw fa5,8(a2)
|
|
fmadd.s fa4,fa4,fa1,fa3
|
|
fmadd.s fa5,fa5,fa2,fa4
|
|
fsw fa5,8(t3)
|
|
ret
|
|
.size mul, .-mul
|
|
.section .text.startup,"ax",@progbits
|
|
.align 1
|
|
.globl main
|
|
.type main, @function
|
|
main:
|
|
addi sp,sp,-224
|
|
sd s3,184(sp)
|
|
addi s3,sp,136
|
|
flw fa5,.LC0,a5
|
|
sd s1,200(sp)
|
|
sd s2,192(sp)
|
|
sd s4,176(sp)
|
|
sd ra,216(sp)
|
|
sd s0,208(sp)
|
|
li t3,3
|
|
sd s3,0(sp)
|
|
lla s2,.LANCHOR0
|
|
addi s4,sp,120
|
|
addi s1,sp,152
|
|
li t1,127
|
|
slli t1,t1,55
|
|
addi a5,sp,104
|
|
mv a7,s4
|
|
mv a6,s1
|
|
addi a4,sp,88
|
|
addi a3,sp,72
|
|
addi a2,sp,56
|
|
addi a1,sp,40
|
|
addi a0,sp,24
|
|
sw t3,0(s2)
|
|
sw t3,4(s2)
|
|
sw t3,8(s2)
|
|
sd t1,24(sp)
|
|
fsw fa5,32(sp)
|
|
sd t1,40(sp)
|
|
fsw fa5,48(sp)
|
|
sd t1,56(sp)
|
|
fsw fa5,64(sp)
|
|
sd t1,72(sp)
|
|
fsw fa5,80(sp)
|
|
sd t1,88(sp)
|
|
fsw fa5,96(sp)
|
|
sd t1,104(sp)
|
|
fsw fa5,112(sp)
|
|
call mul
|
|
li a5,2
|
|
bgt a0,a5,.L4
|
|
slli a5,a0,2
|
|
mv s0,a0
|
|
add s1,s1,a5
|
|
.L5:
|
|
flw fa5,0(s1)
|
|
addiw s0,s0,1
|
|
addi s1,s1,4
|
|
fcvt.w.s a0,fa5,rtz
|
|
sext.w a0,a0
|
|
call putint@plt
|
|
lw a5,0(s2)
|
|
bgt a5,s0,.L5
|
|
.L4:
|
|
li a0,10
|
|
call putch@plt
|
|
lw a5,0(s2)
|
|
ble a5,zero,.L6
|
|
li s1,0
|
|
.L7:
|
|
flw fa5,0(s4)
|
|
addiw s1,s1,1
|
|
addi s4,s4,4
|
|
fcvt.w.s a0,fa5,rtz
|
|
sext.w a0,a0
|
|
call putint@plt
|
|
lw a5,0(s2)
|
|
bgt a5,s1,.L7
|
|
.L6:
|
|
li a0,10
|
|
call putch@plt
|
|
lw a5,0(s2)
|
|
ble a5,zero,.L8
|
|
li s1,0
|
|
.L9:
|
|
flw fa5,0(s3)
|
|
addiw s1,s1,1
|
|
addi s3,s3,4
|
|
fcvt.w.s a0,fa5,rtz
|
|
sext.w a0,a0
|
|
call putint@plt
|
|
lw a5,0(s2)
|
|
bgt a5,s1,.L9
|
|
.L8:
|
|
li a0,10
|
|
call putch@plt
|
|
ld ra,216(sp)
|
|
ld s0,208(sp)
|
|
ld s1,200(sp)
|
|
ld s2,192(sp)
|
|
ld s3,184(sp)
|
|
ld s4,176(sp)
|
|
li a0,0
|
|
addi sp,sp,224
|
|
jr ra
|
|
.size main, .-main
|
|
.globl N
|
|
.globl L
|
|
.globl M
|
|
.globl _sysy_idx
|
|
.globl _sysy_us
|
|
.globl _sysy_s
|
|
.globl _sysy_m
|
|
.globl _sysy_h
|
|
.globl _sysy_l2
|
|
.globl _sysy_l1
|
|
.globl _sysy_end
|
|
.globl _sysy_start
|
|
.section .rodata.cst4,"aM",@progbits,4
|
|
.align 2
|
|
.LC0:
|
|
.word 1073741824
|
|
.bss
|
|
.align 3
|
|
.set .LANCHOR0,. + 0
|
|
.type N, @object
|
|
.size N, 4
|
|
N:
|
|
.zero 4
|
|
.type M, @object
|
|
.size M, 4
|
|
M:
|
|
.zero 4
|
|
.type L, @object
|
|
.size L, 4
|
|
L:
|
|
.zero 4
|
|
.type _sysy_idx, @object
|
|
.size _sysy_idx, 4
|
|
_sysy_idx:
|
|
.zero 4
|
|
.type _sysy_us, @object
|
|
.size _sysy_us, 4096
|
|
_sysy_us:
|
|
.zero 4096
|
|
.type _sysy_s, @object
|
|
.size _sysy_s, 4096
|
|
_sysy_s:
|
|
.zero 4096
|
|
.type _sysy_m, @object
|
|
.size _sysy_m, 4096
|
|
_sysy_m:
|
|
.zero 4096
|
|
.type _sysy_h, @object
|
|
.size _sysy_h, 4096
|
|
_sysy_h:
|
|
.zero 4096
|
|
.type _sysy_l2, @object
|
|
.size _sysy_l2, 4096
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.type _sysy_l1, @object
|
|
.size _sysy_l1, 4096
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.type _sysy_end, @object
|
|
.size _sysy_end, 16
|
|
_sysy_end:
|
|
.zero 16
|
|
.type _sysy_start, @object
|
|
.size _sysy_start, 16
|
|
_sysy_start:
|
|
.zero 16
|
|
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
|
|
.section .note.GNU-stack,"",@progbits
|