sysy-data/performance_c/asm/transpose2.s

253 lines
3.6 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "transpose2.sy"
.globl transpose
.p2align 1
.type transpose,@function
transpose:
divw a3, a0, a2
slti a0, a3, 1
slti a4, a2, 1
or a0, a0, a4
bnez a0, .LBB0_7
li a0, 0
slli a3, a3, 32
srli a6, a3, 32
slli a2, a2, 32
srli t1, a2, 32
srli t2, a3, 30
srli a7, a2, 30
mv t0, a1
j .LBB0_3
.LBB0_2:
addi a0, a0, 1
addi t0, t0, 4
add a1, a1, a7
beq a0, a6, .LBB0_7
.LBB0_3:
li a5, 0
mv a3, a1
mv a2, t0
j .LBB0_5
.LBB0_4:
addi a5, a5, 1
add a2, a2, t2
addi a3, a3, 4
beq t1, a5, .LBB0_2
.LBB0_5:
bltu a0, a5, .LBB0_4
lw a4, 0(a3)
sw a4, 0(a2)
sw a4, 0(a3)
j .LBB0_4
.LBB0_7:
li a0, -1
ret
.Lfunc_end0:
.size transpose, .Lfunc_end0-transpose
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -48
sd ra, 40(sp)
sd s0, 32(sp)
sd s1, 24(sp)
sd s2, 16(sp)
sd s3, 8(sp)
call getint
mv s3, a0
lui a0, %hi(a)
addi a0, a0, %lo(a)
call getarray
mv s2, a0
li a0, 31
call _sysy_starttime
blez s3, .LBB1_3
li a0, 0
lui a1, %hi(matrix)
addi a1, a1, %lo(matrix)
slli a2, s3, 32
srli a2, a2, 32
.LBB1_2:
sw a0, 0(a1)
addi a0, a0, 1
addi a1, a1, 4
bne a2, a0, .LBB1_2
.LBB1_3:
blez s2, .LBB1_16
li t1, 0
slli a0, s2, 32
srli t0, a0, 32
lui a0, %hi(a)
addi a6, a0, %lo(a)
lui a0, %hi(matrix)
addi a7, a0, %lo(matrix)
j .LBB1_6
.LBB1_5:
addi t1, t1, 1
beq t1, t0, .LBB1_13
.LBB1_6:
slli a0, t1, 2
add a0, a0, a6
lw a0, 0(a0)
divw a1, s3, a0
slti a2, a1, 1
slti a3, a0, 1
or a2, a2, a3
bnez a2, .LBB1_5
li a4, 0
slli a0, a0, 32
srli a5, a0, 32
slli a0, a1, 32
srli t2, a0, 32
srli s0, a0, 30
slli t3, a5, 2
mv t4, a7
mv a2, a7
j .LBB1_9
.LBB1_8:
addi a4, a4, 1
addi a2, a2, 4
add t4, t4, t3
beq a4, t2, .LBB1_5
.LBB1_9:
li a1, 0
mv a3, t4
mv a0, a2
j .LBB1_11
.LBB1_10:
addi a1, a1, 1
add a0, a0, s0
addi a3, a3, 4
beq a5, a1, .LBB1_8
.LBB1_11:
bltu a4, a1, .LBB1_10
lw s1, 0(a3)
sw s1, 0(a0)
sw s1, 0(a3)
j .LBB1_10
.LBB1_13:
blez s2, .LBB1_16
li a1, 0
li a0, 0
lui a2, %hi(matrix)
addi a2, a2, %lo(matrix)
.LBB1_15:
lw a3, 0(a2)
mulw a4, a1, a1
mulw a3, a4, a3
addw a0, a0, a3
addi a1, a1, 1
addi a2, a2, 4
bne t0, a1, .LBB1_15
j .LBB1_17
.LBB1_16:
li a0, 0
.LBB1_17:
srai a1, a0, 63
add a0, a0, a1
xor s0, a0, a1
li a0, 51
call _sysy_stoptime
sext.w a0, s0
call putint
li a0, 10
call putch
li a0, 0
ld ra, 40(sp)
ld s0, 32(sp)
ld s1, 24(sp)
ld s2, 16(sp)
ld s3, 8(sp)
addi sp, sp, 48
ret
.Lfunc_end1:
.size main, .Lfunc_end1-main
.type a,@object
.bss
.globl a
.p2align 2
a:
.zero 400000
.size a, 400000
.type matrix,@object
.globl matrix
.p2align 2
matrix:
.zero 80000000
.size matrix, 80000000
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym a