208 lines
3.0 KiB
ArmAsm
208 lines
3.0 KiB
ArmAsm
.file "transpose2.sy"
|
|
.option pic
|
|
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
|
|
.attribute unaligned_access, 0
|
|
.attribute stack_align, 16
|
|
.text
|
|
.align 1
|
|
.globl transpose
|
|
.type transpose, @function
|
|
transpose:
|
|
divw t5,a0,a2
|
|
ble t5,zero,.L2
|
|
ble a2,zero,.L2
|
|
addiw t0,a2,-1
|
|
mv t6,t0
|
|
mv t3,a1
|
|
sext.w t4,a2
|
|
slli a6,t5,2
|
|
li t1,0
|
|
li a7,0
|
|
li a5,0
|
|
j .L7
|
|
.L10:
|
|
mv a5,t1
|
|
.L7:
|
|
sext.w a0,t6
|
|
ble t0,a7,.L4
|
|
sext.w a0,a7
|
|
.L4:
|
|
slli a5,a5,2
|
|
add a5,a1,a5
|
|
mv a3,t3
|
|
li a4,0
|
|
.L5:
|
|
lw a2,0(a5)
|
|
addiw a4,a4,1
|
|
addi a5,a5,4
|
|
sw a2,0(a3)
|
|
add a3,a3,a6
|
|
ble a4,a0,.L5
|
|
addiw a7,a7,1
|
|
addi t3,t3,4
|
|
addw t1,t1,t4
|
|
bne t5,a7,.L10
|
|
.L2:
|
|
li a0,-1
|
|
ret
|
|
.size transpose, .-transpose
|
|
.section .text.startup,"ax",@progbits
|
|
.align 1
|
|
.globl main
|
|
.type main, @function
|
|
main:
|
|
addi sp,sp,-32
|
|
sd ra,24(sp)
|
|
sd s0,16(sp)
|
|
sd s1,8(sp)
|
|
call getint@plt
|
|
mv s1,a0
|
|
lla a0,a
|
|
call getarray@plt
|
|
mv s0,a0
|
|
li a0,32
|
|
call _sysy_starttime@plt
|
|
lla a4,matrix
|
|
li a5,0
|
|
ble s1,zero,.L16
|
|
.L15:
|
|
sw a5,0(a4)
|
|
addiw a5,a5,1
|
|
addi a4,a4,4
|
|
bne s1,a5,.L15
|
|
.L16:
|
|
ble s0,zero,.L32
|
|
lla t5,a
|
|
slli ra,s0,2
|
|
add ra,ra,t5
|
|
lla t2,matrix
|
|
.L23:
|
|
lw t3,0(t5)
|
|
divw t4,s1,t3
|
|
ble t4,zero,.L17
|
|
ble t3,zero,.L17
|
|
addiw t0,t3,-1
|
|
mv t6,t0
|
|
lla t1,matrix
|
|
sext.w t3,t3
|
|
slli a0,t4,2
|
|
li a6,0
|
|
li a7,0
|
|
li a5,0
|
|
j .L22
|
|
.L33:
|
|
mv a5,a7
|
|
.L22:
|
|
sext.w a1,t6
|
|
ble t0,a6,.L19
|
|
sext.w a1,a6
|
|
.L19:
|
|
slli a5,a5,2
|
|
add a5,a5,t2
|
|
mv a3,t1
|
|
li a4,0
|
|
.L20:
|
|
lw a2,0(a5)
|
|
addiw a4,a4,1
|
|
addi a5,a5,4
|
|
sw a2,0(a3)
|
|
add a3,a3,a0
|
|
bge a1,a4,.L20
|
|
addiw a6,a6,1
|
|
addi t1,t1,4
|
|
addw a7,a7,t3
|
|
bne t4,a6,.L33
|
|
.L17:
|
|
addi t5,t5,4
|
|
bne t5,ra,.L23
|
|
lla a3,matrix
|
|
li s1,0
|
|
li a5,0
|
|
.L24:
|
|
mulw a4,a5,a5
|
|
lw a2,0(a3)
|
|
addiw a5,a5,1
|
|
addi a3,a3,4
|
|
mulw a4,a4,a2
|
|
addw s1,a4,s1
|
|
bne s0,a5,.L24
|
|
.L14:
|
|
li a0,52
|
|
call _sysy_stoptime@plt
|
|
sraiw a0,s1,31
|
|
xor s1,s1,a0
|
|
subw a0,s1,a0
|
|
call putint@plt
|
|
li a0,10
|
|
call putch@plt
|
|
ld ra,24(sp)
|
|
ld s0,16(sp)
|
|
ld s1,8(sp)
|
|
li a0,0
|
|
addi sp,sp,32
|
|
jr ra
|
|
.L32:
|
|
li s1,0
|
|
j .L14
|
|
.size main, .-main
|
|
.globl a
|
|
.globl matrix
|
|
.globl _sysy_idx
|
|
.globl _sysy_us
|
|
.globl _sysy_s
|
|
.globl _sysy_m
|
|
.globl _sysy_h
|
|
.globl _sysy_l2
|
|
.globl _sysy_l1
|
|
.globl _sysy_end
|
|
.globl _sysy_start
|
|
.bss
|
|
.align 3
|
|
.type a, @object
|
|
.size a, 400000
|
|
a:
|
|
.zero 400000
|
|
.type matrix, @object
|
|
.size matrix, 80000000
|
|
matrix:
|
|
.zero 80000000
|
|
.type _sysy_idx, @object
|
|
.size _sysy_idx, 4
|
|
_sysy_idx:
|
|
.zero 4
|
|
.zero 4
|
|
.type _sysy_us, @object
|
|
.size _sysy_us, 4096
|
|
_sysy_us:
|
|
.zero 4096
|
|
.type _sysy_s, @object
|
|
.size _sysy_s, 4096
|
|
_sysy_s:
|
|
.zero 4096
|
|
.type _sysy_m, @object
|
|
.size _sysy_m, 4096
|
|
_sysy_m:
|
|
.zero 4096
|
|
.type _sysy_h, @object
|
|
.size _sysy_h, 4096
|
|
_sysy_h:
|
|
.zero 4096
|
|
.type _sysy_l2, @object
|
|
.size _sysy_l2, 4096
|
|
_sysy_l2:
|
|
.zero 4096
|
|
.type _sysy_l1, @object
|
|
.size _sysy_l1, 4096
|
|
_sysy_l1:
|
|
.zero 4096
|
|
.type _sysy_end, @object
|
|
.size _sysy_end, 16
|
|
_sysy_end:
|
|
.zero 16
|
|
.type _sysy_start, @object
|
|
.size _sysy_start, 16
|
|
_sysy_start:
|
|
.zero 16
|
|
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
|
|
.section .note.GNU-stack,"",@progbits
|