sysy-data/final_performance_c/asm/01_mm1.s

282 lines
3.8 KiB
ArmAsm

.file "01_mm1.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl mm
.type mm, @function
mm:
ble a0,zero,.L21
addi sp,sp,-96
sd s1,72(sp)
slli a5,a0,32
slli s1,a0,12
sd s0,80(sp)
sd s2,64(sp)
sd s3,56(sp)
sd s4,48(sp)
sd s5,40(sp)
sd s6,32(sp)
sd s7,24(sp)
sd s8,16(sp)
sd s9,8(sp)
sd ra,88(sp)
mv s2,a0
mv s9,a1
mv s0,a2
mv s8,a3
add s7,a3,s1
sext.w s4,a0
srli s6,a5,30
mv s3,a3
li s5,4096
.L5:
mv a0,s3
mv a2,s6
add s3,s3,s5
li a1,0
call memset@plt
bne s3,s7,.L5
addiw t2,s4,-2
slli a4,t2,32
srli t2,a4,30
slli a5,s2,2
addi a4,s8,8
addi t0,s8,4
addi t4,s0,4
mv a0,s9
add s0,s9,a5
add t2,t2,a4
add t5,t0,s1
li t6,1
li t3,4096
.L4:
mv t1,t0
mv a7,t2
mv a6,a0
j .L9
.L7:
add t1,t1,t3
add a6,a6,t3
add a7,a7,t3
beq t1,t5,.L22
.L9:
lw a4,0(a6)
beq a4,zero,.L7
lw a5,-4(t4)
lw a3,-4(t1)
mulw a5,a5,a4
addw a5,a5,a3
sw a5,-4(t1)
beq s2,t6,.L7
mv a3,t4
mv a4,t1
.L8:
lw a1,0(a3)
lw a5,0(a6)
lw a2,0(a4)
addi a4,a4,4
mulw a5,a5,a1
addi a3,a3,4
addw a5,a5,a2
sw a5,-4(a4)
bne a4,a7,.L8
add t1,t1,t3
add a6,a6,t3
add a7,a7,t3
bne t1,t5,.L9
.L22:
addi a0,a0,4
add t4,t4,t3
bne a0,s0,.L4
ld ra,88(sp)
ld s0,80(sp)
ld s1,72(sp)
ld s2,64(sp)
ld s3,56(sp)
ld s4,48(sp)
ld s5,40(sp)
ld s6,32(sp)
ld s7,24(sp)
ld s8,16(sp)
ld s9,8(sp)
addi sp,sp,96
jr ra
.L21:
ret
.size mm, .-mm
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-64
sd s2,32(sp)
sd ra,56(sp)
sd s0,48(sp)
sd s1,40(sp)
sd s3,24(sp)
sd s4,16(sp)
sd s5,8(sp)
sd s6,0(sp)
call getint@plt
mv s2,a0
ble a0,zero,.L25
slli s6,a0,10
add s6,s6,a0
lla s4,A
slli s3,a0,2
slli s6,s6,2
add s1,s3,s4
li s5,4096
add s4,s4,s6
.L27:
sub s0,s1,s3
.L26:
call getint@plt
sw a0,0(s0)
addi s0,s0,4
bne s0,s1,.L26
add s1,s0,s5
bne s1,s4,.L27
lla s4,B
add s1,s4,s3
li s5,4096
add s4,s4,s6
.L28:
sub s0,s1,s3
.L29:
call getint@plt
sw a0,0(s0)
addi s0,s0,4
bne s0,s1,.L29
add s1,s1,s5
bne s1,s4,.L28
.L25:
li a0,71
call _sysy_starttime@plt
li s0,5
lla s4,C
lla s3,B
lla s1,A
.L30:
mv a3,s4
mv a2,s3
mv a1,s1
mv a0,s2
call mm
addiw s0,s0,-1
mv a3,s3
mv a2,s4
mv a1,s1
mv a0,s2
call mm
bne s0,zero,.L30
ble s2,zero,.L34
slli a2,s2,10
add a2,a2,s2
lla a5,B
slli s2,s2,2
slli a2,a2,2
add a3,a5,s2
add a2,a2,a5
li a1,4096
.L32:
sub a5,a3,s2
.L33:
lw a4,0(a5)
addi a5,a5,4
addw s0,a4,s0
bne a3,a5,.L33
add a3,a3,a1
bne a3,a2,.L32
.L31:
li a0,90
call _sysy_stoptime@plt
mv a0,s0
call putint@plt
li a0,10
call putch@plt
ld ra,56(sp)
ld s0,48(sp)
ld s1,40(sp)
ld s2,32(sp)
ld s3,24(sp)
ld s4,16(sp)
ld s5,8(sp)
ld s6,0(sp)
li a0,0
addi sp,sp,64
jr ra
.L34:
li s0,0
j .L31
.size main, .-main
.globl C
.globl B
.globl A
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.bss
.align 3
.type C, @object
.size C, 4194304
C:
.zero 4194304
.type B, @object
.size B, 4194304
B:
.zero 4194304
.type A, @object
.size A, 4194304
A:
.zero 4194304
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits