253 lines
3.6 KiB
ArmAsm
253 lines
3.6 KiB
ArmAsm
|
.text
|
||
|
.attribute 4, 16
|
||
|
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
|
||
|
.file "transpose1.sy"
|
||
|
.globl transpose
|
||
|
.p2align 1
|
||
|
.type transpose,@function
|
||
|
transpose:
|
||
|
divw a3, a0, a2
|
||
|
slti a0, a3, 1
|
||
|
slti a4, a2, 1
|
||
|
or a0, a0, a4
|
||
|
bnez a0, .LBB0_7
|
||
|
li a0, 0
|
||
|
slli a3, a3, 32
|
||
|
srli a6, a3, 32
|
||
|
slli a2, a2, 32
|
||
|
srli t1, a2, 32
|
||
|
srli t2, a3, 30
|
||
|
srli a7, a2, 30
|
||
|
mv t0, a1
|
||
|
j .LBB0_3
|
||
|
.LBB0_2:
|
||
|
addi a0, a0, 1
|
||
|
addi t0, t0, 4
|
||
|
add a1, a1, a7
|
||
|
beq a0, a6, .LBB0_7
|
||
|
.LBB0_3:
|
||
|
li a5, 0
|
||
|
mv a3, a1
|
||
|
mv a2, t0
|
||
|
j .LBB0_5
|
||
|
.LBB0_4:
|
||
|
addi a5, a5, 1
|
||
|
add a2, a2, t2
|
||
|
addi a3, a3, 4
|
||
|
beq t1, a5, .LBB0_2
|
||
|
.LBB0_5:
|
||
|
bltu a0, a5, .LBB0_4
|
||
|
lw a4, 0(a3)
|
||
|
sw a4, 0(a2)
|
||
|
sw a4, 0(a3)
|
||
|
j .LBB0_4
|
||
|
.LBB0_7:
|
||
|
li a0, -1
|
||
|
ret
|
||
|
.Lfunc_end0:
|
||
|
.size transpose, .Lfunc_end0-transpose
|
||
|
|
||
|
.globl main
|
||
|
.p2align 1
|
||
|
.type main,@function
|
||
|
main:
|
||
|
addi sp, sp, -48
|
||
|
sd ra, 40(sp)
|
||
|
sd s0, 32(sp)
|
||
|
sd s1, 24(sp)
|
||
|
sd s2, 16(sp)
|
||
|
sd s3, 8(sp)
|
||
|
call getint
|
||
|
mv s3, a0
|
||
|
lui a0, %hi(a)
|
||
|
addi a0, a0, %lo(a)
|
||
|
call getarray
|
||
|
mv s2, a0
|
||
|
li a0, 29
|
||
|
call _sysy_starttime
|
||
|
blez s3, .LBB1_3
|
||
|
li a0, 0
|
||
|
lui a1, %hi(matrix)
|
||
|
addi a1, a1, %lo(matrix)
|
||
|
slli a2, s3, 32
|
||
|
srli a2, a2, 32
|
||
|
.LBB1_2:
|
||
|
sw a0, 0(a1)
|
||
|
addi a0, a0, 1
|
||
|
addi a1, a1, 4
|
||
|
bne a2, a0, .LBB1_2
|
||
|
.LBB1_3:
|
||
|
blez s2, .LBB1_16
|
||
|
li t1, 0
|
||
|
slli a0, s2, 32
|
||
|
srli t0, a0, 32
|
||
|
lui a0, %hi(a)
|
||
|
addi a6, a0, %lo(a)
|
||
|
lui a0, %hi(matrix)
|
||
|
addi a7, a0, %lo(matrix)
|
||
|
j .LBB1_6
|
||
|
.LBB1_5:
|
||
|
addi t1, t1, 1
|
||
|
beq t1, t0, .LBB1_13
|
||
|
.LBB1_6:
|
||
|
slli a0, t1, 2
|
||
|
add a0, a0, a6
|
||
|
lw a0, 0(a0)
|
||
|
divw a1, s3, a0
|
||
|
slti a2, a1, 1
|
||
|
slti a3, a0, 1
|
||
|
or a2, a2, a3
|
||
|
bnez a2, .LBB1_5
|
||
|
li a4, 0
|
||
|
slli a0, a0, 32
|
||
|
srli a5, a0, 32
|
||
|
slli a0, a1, 32
|
||
|
srli t2, a0, 32
|
||
|
srli s0, a0, 30
|
||
|
slli t3, a5, 2
|
||
|
mv t4, a7
|
||
|
mv a2, a7
|
||
|
j .LBB1_9
|
||
|
.LBB1_8:
|
||
|
addi a4, a4, 1
|
||
|
addi a2, a2, 4
|
||
|
add t4, t4, t3
|
||
|
beq a4, t2, .LBB1_5
|
||
|
.LBB1_9:
|
||
|
li a1, 0
|
||
|
mv a3, t4
|
||
|
mv a0, a2
|
||
|
j .LBB1_11
|
||
|
.LBB1_10:
|
||
|
addi a1, a1, 1
|
||
|
add a0, a0, s0
|
||
|
addi a3, a3, 4
|
||
|
beq a5, a1, .LBB1_8
|
||
|
.LBB1_11:
|
||
|
bltu a4, a1, .LBB1_10
|
||
|
lw s1, 0(a3)
|
||
|
sw s1, 0(a0)
|
||
|
sw s1, 0(a3)
|
||
|
j .LBB1_10
|
||
|
.LBB1_13:
|
||
|
blez s2, .LBB1_16
|
||
|
li a1, 0
|
||
|
li a0, 0
|
||
|
lui a2, %hi(matrix)
|
||
|
addi a2, a2, %lo(matrix)
|
||
|
.LBB1_15:
|
||
|
lw a3, 0(a2)
|
||
|
mulw a4, a1, a1
|
||
|
mulw a3, a4, a3
|
||
|
addw a0, a0, a3
|
||
|
addi a1, a1, 1
|
||
|
addi a2, a2, 4
|
||
|
bne t0, a1, .LBB1_15
|
||
|
j .LBB1_17
|
||
|
.LBB1_16:
|
||
|
li a0, 0
|
||
|
.LBB1_17:
|
||
|
srai a1, a0, 63
|
||
|
add a0, a0, a1
|
||
|
xor s0, a0, a1
|
||
|
li a0, 48
|
||
|
call _sysy_stoptime
|
||
|
sext.w a0, s0
|
||
|
call putint
|
||
|
li a0, 10
|
||
|
call putch
|
||
|
li a0, 0
|
||
|
ld ra, 40(sp)
|
||
|
ld s0, 32(sp)
|
||
|
ld s1, 24(sp)
|
||
|
ld s2, 16(sp)
|
||
|
ld s3, 8(sp)
|
||
|
addi sp, sp, 48
|
||
|
ret
|
||
|
.Lfunc_end1:
|
||
|
.size main, .Lfunc_end1-main
|
||
|
|
||
|
.type a,@object
|
||
|
.bss
|
||
|
.globl a
|
||
|
.p2align 2
|
||
|
a:
|
||
|
.zero 400000
|
||
|
.size a, 400000
|
||
|
|
||
|
.type matrix,@object
|
||
|
.globl matrix
|
||
|
.p2align 2
|
||
|
matrix:
|
||
|
.zero 80000000
|
||
|
.size matrix, 80000000
|
||
|
|
||
|
.type _sysy_start,@object
|
||
|
.globl _sysy_start
|
||
|
.p2align 3
|
||
|
_sysy_start:
|
||
|
.zero 16
|
||
|
.size _sysy_start, 16
|
||
|
|
||
|
.type _sysy_end,@object
|
||
|
.globl _sysy_end
|
||
|
.p2align 3
|
||
|
_sysy_end:
|
||
|
.zero 16
|
||
|
.size _sysy_end, 16
|
||
|
|
||
|
.type _sysy_l1,@object
|
||
|
.globl _sysy_l1
|
||
|
.p2align 2
|
||
|
_sysy_l1:
|
||
|
.zero 4096
|
||
|
.size _sysy_l1, 4096
|
||
|
|
||
|
.type _sysy_l2,@object
|
||
|
.globl _sysy_l2
|
||
|
.p2align 2
|
||
|
_sysy_l2:
|
||
|
.zero 4096
|
||
|
.size _sysy_l2, 4096
|
||
|
|
||
|
.type _sysy_h,@object
|
||
|
.globl _sysy_h
|
||
|
.p2align 2
|
||
|
_sysy_h:
|
||
|
.zero 4096
|
||
|
.size _sysy_h, 4096
|
||
|
|
||
|
.type _sysy_m,@object
|
||
|
.globl _sysy_m
|
||
|
.p2align 2
|
||
|
_sysy_m:
|
||
|
.zero 4096
|
||
|
.size _sysy_m, 4096
|
||
|
|
||
|
.type _sysy_s,@object
|
||
|
.globl _sysy_s
|
||
|
.p2align 2
|
||
|
_sysy_s:
|
||
|
.zero 4096
|
||
|
.size _sysy_s, 4096
|
||
|
|
||
|
.type _sysy_us,@object
|
||
|
.globl _sysy_us
|
||
|
.p2align 2
|
||
|
_sysy_us:
|
||
|
.zero 4096
|
||
|
.size _sysy_us, 4096
|
||
|
|
||
|
.type _sysy_idx,@object
|
||
|
.section .sbss,"aw",@nobits
|
||
|
.globl _sysy_idx
|
||
|
.p2align 2
|
||
|
_sysy_idx:
|
||
|
.word 0
|
||
|
.size _sysy_idx, 4
|
||
|
|
||
|
.ident "Debian clang version 14.0.6"
|
||
|
.section ".note.GNU-stack","",@progbits
|
||
|
.addrsig
|
||
|
.addrsig_sym a
|