sysy-data/final_performance_c/asm/01_mm1.s

415 lines
5.8 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "01_mm1.sy"
.globl mm
.p2align 1
.type mm,@function
mm:
addi sp, sp, -80
sd ra, 72(sp)
sd s0, 64(sp)
sd s1, 56(sp)
sd s2, 48(sp)
sd s3, 40(sp)
sd s4, 32(sp)
sd s5, 24(sp)
sd s6, 16(sp)
sd s7, 8(sp)
sd s8, 0(sp)
blez a0, .LBB0_12
mv s3, a3
mv s2, a2
mv s7, a1
mv s5, a0
slli s6, a0, 32
srli s1, s6, 32
srli s4, s6, 30
lui s8, 1
mv s0, a3
.LBB0_2:
mv a0, s0
li a1, 0
mv a2, s4
call memset@plt
addi s1, s1, -1
add s0, s0, s8
bnez s1, .LBB0_2
blez s5, .LBB0_12
li t5, 0
srli t4, s6, 32
addi a7, t4, -1
addi t1, s2, 4
addi a6, s3, 4
lui t2, 1
li t0, 1
j .LBB0_6
.LBB0_5:
addi t5, t5, 1
add t1, t1, t2
beq t5, t4, .LBB0_12
.LBB0_6:
li a4, 0
slli a2, t5, 12
add t3, s2, a2
mv t6, a6
j .LBB0_8
.LBB0_7:
addi a4, a4, 1
add t6, t6, t2
beq a4, t4, .LBB0_5
.LBB0_8:
slli a2, a4, 12
add a3, s7, a2
slli s1, t5, 2
add a3, a3, s1
lw s1, 0(a3)
beqz s1, .LBB0_7
add a2, a2, s3
lw s0, 0(t3)
lw a1, 0(a2)
mulw s1, s0, s1
addw a1, a1, s1
sw a1, 0(a2)
beq s5, t0, .LBB0_7
mv s0, t6
mv s1, t1
mv a2, a7
.LBB0_11:
lw a1, 0(a3)
lw a0, 0(s1)
lw a5, 0(s0)
mulw a0, a0, a1
addw a0, a0, a5
sw a0, 0(s0)
addi a2, a2, -1
addi s1, s1, 4
addi s0, s0, 4
bnez a2, .LBB0_11
j .LBB0_7
.LBB0_12:
ld ra, 72(sp)
ld s0, 64(sp)
ld s1, 56(sp)
ld s2, 48(sp)
ld s3, 40(sp)
ld s4, 32(sp)
ld s5, 24(sp)
ld s6, 16(sp)
ld s7, 8(sp)
ld s8, 0(sp)
addi sp, sp, 80
ret
.Lfunc_end0:
.size mm, .Lfunc_end0-mm
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -112
sd ra, 104(sp)
sd s0, 96(sp)
sd s1, 88(sp)
sd s2, 80(sp)
sd s3, 72(sp)
sd s4, 64(sp)
sd s5, 56(sp)
sd s6, 48(sp)
sd s7, 40(sp)
sd s8, 32(sp)
sd s9, 24(sp)
sd s10, 16(sp)
sd s11, 8(sp)
call getint
blez a0, .LBB1_31
mv s2, a0
li s6, 0
lui a0, %hi(A)
addi s7, a0, %lo(A)
slli s3, s2, 32
srli s4, s3, 32
lui s5, 1
.LBB1_2:
mv s1, s4
mv s0, s7
.LBB1_3:
call getint
sw a0, 0(s0)
addi s1, s1, -1
addi s0, s0, 4
bnez s1, .LBB1_3
addi s6, s6, 1
add s7, s7, s5
bne s6, s4, .LBB1_2
blez s2, .LBB1_31
li s4, 0
lui a0, %hi(B)
addi s5, a0, %lo(B)
srli s11, s3, 32
lui s3, 1
.LBB1_7:
mv s0, s11
mv s1, s5
.LBB1_8:
call getint
sw a0, 0(s1)
addi s0, s0, -1
addi s1, s1, 4
bnez s0, .LBB1_8
addi s4, s4, 1
add s5, s5, s3
bne s4, s11, .LBB1_7
li a0, 71
call _sysy_starttime
blez s2, .LBB1_32
li a0, 0
slli s3, s11, 2
lui a1, %hi(C)
addi s4, a1, %lo(C)
lui s7, 1
lui a1, %hi(B)
addi s6, a1, %lo(B)
lui a1, %hi(A)
addi s10, a1, %lo(A)
li s8, 4
j .LBB1_13
.LBB1_12:
addiw a0, s9, 1
bgeu s9, s8, .LBB1_34
.LBB1_13:
mv s9, a0
mv s5, s4
mv s0, s11
.LBB1_14:
mv a0, s5
li a1, 0
mv a2, s3
call memset@plt
addi s0, s0, -1
add s5, s5, s7
bnez s0, .LBB1_14
li a7, 0
mv a6, s6
j .LBB1_17
.LBB1_16:
addi a7, a7, 1
add a6, a6, s7
beq a7, s11, .LBB1_22
.LBB1_17:
li a2, 0
mv a3, s4
j .LBB1_19
.LBB1_18:
addi a2, a2, 1
add a3, a3, s7
beq a2, s11, .LBB1_16
.LBB1_19:
slli a1, a7, 2
slli a4, a2, 12
add a1, a1, a4
add a1, a1, s10
lw a4, 0(a1)
beqz a4, .LBB1_18
mv a5, a3
mv a1, a6
mv s1, s11
.LBB1_21:
lw s0, 0(a1)
lw a0, 0(a5)
mulw s0, s0, a4
addw a0, a0, s0
sw a0, 0(a5)
addi s1, s1, -1
addi a1, a1, 4
addi a5, a5, 4
bnez s1, .LBB1_21
j .LBB1_18
.LBB1_22:
mv s5, s6
mv s0, s11
.LBB1_23:
mv a0, s5
li a1, 0
mv a2, s3
call memset@plt
addi s0, s0, -1
add s5, s5, s7
bnez s0, .LBB1_23
li a7, 0
mv a6, s4
j .LBB1_26
.LBB1_25:
addi a7, a7, 1
add a6, a6, s7
beq a7, s11, .LBB1_12
.LBB1_26:
li a2, 0
mv a3, s6
j .LBB1_28
.LBB1_27:
addi a2, a2, 1
add a3, a3, s7
beq a2, s11, .LBB1_25
.LBB1_28:
slli a1, a7, 2
slli a4, a2, 12
add a1, a1, a4
add a1, a1, s10
lw a4, 0(a1)
beqz a4, .LBB1_27
mv a5, a3
mv a1, a6
mv s0, s11
.LBB1_30:
lw s1, 0(a1)
lw a0, 0(a5)
mulw s1, s1, a4
addw a0, a0, s1
sw a0, 0(a5)
addi s0, s0, -1
addi a1, a1, 4
addi a5, a5, 4
bnez s0, .LBB1_30
j .LBB1_27
.LBB1_31:
li a0, 71
call _sysy_starttime
.LBB1_32:
li s1, 0
.LBB1_33:
li a0, 90
call _sysy_stoptime
mv a0, s1
call putint
li a0, 10
call putch
li a0, 0
ld ra, 104(sp)
ld s0, 96(sp)
ld s1, 88(sp)
ld s2, 80(sp)
ld s3, 72(sp)
ld s4, 64(sp)
ld s5, 56(sp)
ld s6, 48(sp)
ld s7, 40(sp)
ld s8, 32(sp)
ld s9, 24(sp)
ld s10, 16(sp)
ld s11, 8(sp)
addi sp, sp, 112
ret
.LBB1_34:
blez s2, .LBB1_32
li a0, 0
li s1, 0
lui a1, %hi(B)
addi a1, a1, %lo(B)
lui a2, 1
.LBB1_36:
mv a3, s11
mv a4, a1
.LBB1_37:
lw a5, 0(a4)
addw s1, s1, a5
addi a3, a3, -1
addi a4, a4, 4
bnez a3, .LBB1_37
addi a0, a0, 1
add a1, a1, a2
bne a0, s11, .LBB1_36
j .LBB1_33
.Lfunc_end1:
.size main, .Lfunc_end1-main
.type A,@object
.bss
.globl A
.p2align 2
A:
.zero 4194304
.size A, 4194304
.type B,@object
.globl B
.p2align 2
B:
.zero 4194304
.size B, 4194304
.type C,@object
.globl C
.p2align 2
C:
.zero 4194304
.size C, 4194304
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig