sysy-data/final_performance_c/asm/gameoflife-p61glidergun.s

704 lines
11 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "gameoflife-p61glidergun.sy"
.globl read_map
.p2align 1
.type read_map,@function
read_map:
addi sp, sp, -64
sd ra, 56(sp)
sd s0, 48(sp)
sd s1, 40(sp)
sd s2, 32(sp)
sd s3, 24(sp)
sd s4, 16(sp)
sd s5, 8(sp)
call getint
lui s5, %hi(width)
sw a0, %lo(width)(s5)
call getint
lui s2, %hi(height)
sw a0, %lo(height)(s2)
call getint
lui a1, %hi(steps)
sw a0, %lo(steps)(a1)
call getch
lw a0, %lo(height)(s2)
blez a0, .LBB0_6
li a0, 1
lui a1, %hi(sheet1+2004)
addi s4, a1, %lo(sheet1+2004)
j .LBB0_3
.LBB0_2:
call getch
lw a1, %lo(height)(s2)
addi a0, s3, 1
addi s4, s4, 2000
bge s3, a1, .LBB0_6
.LBB0_3:
lw a1, %lo(width)(s5)
mv s3, a0
blez a1, .LBB0_2
li s1, 0
mv s0, s4
.LBB0_5:
call getch
addi a0, a0, -35
lw a1, %lo(width)(s5)
seqz a0, a0
sw a0, 0(s0)
addi s1, s1, 1
addi s0, s0, 4
blt s1, a1, .LBB0_5
j .LBB0_2
.LBB0_6:
ld ra, 56(sp)
ld s0, 48(sp)
ld s1, 40(sp)
ld s2, 32(sp)
ld s3, 24(sp)
ld s4, 16(sp)
ld s5, 8(sp)
addi sp, sp, 64
ret
.Lfunc_end0:
.size read_map, .Lfunc_end0-read_map
.globl put_map
.p2align 1
.type put_map,@function
put_map:
addi sp, sp, -64
sd ra, 56(sp)
sd s0, 48(sp)
sd s1, 40(sp)
sd s2, 32(sp)
sd s3, 24(sp)
sd s4, 16(sp)
sd s5, 8(sp)
sd s6, 0(sp)
lui s2, %hi(height)
lw a0, %lo(height)(s2)
blez a0, .LBB1_8
li s5, 1
lui a0, %hi(sheet1+2004)
addi s3, a0, %lo(sheet1+2004)
lui s6, %hi(width)
li a0, 1
j .LBB1_3
.LBB1_2:
li a0, 10
call putch
lw a1, %lo(height)(s2)
addi a0, s4, 1
addi s3, s3, 2000
bge s4, a1, .LBB1_8
.LBB1_3:
lw a1, %lo(width)(s6)
mv s4, a0
blez a1, .LBB1_2
li s1, 0
mv s0, s3
j .LBB1_6
.LBB1_5:
call putch
lw a0, %lo(width)(s6)
addi s1, s1, 1
addi s0, s0, 4
bge s1, a0, .LBB1_2
.LBB1_6:
lw a1, 0(s0)
li a0, 35
beq a1, s5, .LBB1_5
li a0, 46
j .LBB1_5
.LBB1_8:
ld ra, 56(sp)
ld s0, 48(sp)
ld s1, 40(sp)
ld s2, 32(sp)
ld s3, 24(sp)
ld s4, 16(sp)
ld s5, 8(sp)
ld s6, 0(sp)
addi sp, sp, 64
ret
.Lfunc_end1:
.size put_map, .Lfunc_end1-put_map
.globl swap12
.p2align 1
.type swap12,@function
swap12:
addi sp, sp, -48
sd ra, 40(sp)
sd s0, 32(sp)
sd s1, 24(sp)
sd s2, 16(sp)
sd s3, 8(sp)
lui a0, %hi(height)
lw a0, %lo(height)(a0)
blez a0, .LBB2_4
lui a1, %hi(width)
lw a1, %lo(width)(a1)
blez a1, .LBB2_4
slli a0, a0, 32
srli s3, a0, 32
slli a0, a1, 32
srli a0, a0, 32
slli s2, a0, 2
lui a0, %hi(sheet2+2004)
addi s1, a0, %lo(sheet2+2004)
lui a0, %hi(sheet1+2004)
addi s0, a0, %lo(sheet1+2004)
.LBB2_3:
mv a0, s0
mv a1, s1
mv a2, s2
call memcpy@plt
addi s3, s3, -1
addi s1, s1, 2000
addi s0, s0, 2000
bnez s3, .LBB2_3
.LBB2_4:
ld ra, 40(sp)
ld s0, 32(sp)
ld s1, 24(sp)
ld s2, 16(sp)
ld s3, 8(sp)
addi sp, sp, 48
ret
.Lfunc_end2:
.size swap12, .Lfunc_end2-swap12
.globl step
.p2align 1
.type step,@function
step:
addi sp, sp, -32
sd s0, 24(sp)
sd s1, 16(sp)
sd s2, 8(sp)
lui a6, %hi(height)
lw a2, %lo(height)(a6)
lui t6, %hi(width)
lw a5, %lo(width)(t6)
slti a3, a2, 1
slti a4, a5, 1
or a3, a3, a4
bnez a3, .LBB3_11
addi t0, a1, 4
addi t1, a0, 8
li t5, 1
li a7, 2000
li t4, 3
li t2, 1
j .LBB3_3
.LBB3_2:
lw a2, %lo(height)(a6)
bge t3, a2, .LBB3_11
.LBB3_3:
mv t3, t2
blez a5, .LBB3_10
li a0, 0
addi t2, t3, 1
slli a1, t2, 32
srli a3, a1, 32
mul a2, t3, a7
add a1, t0, a2
add a2, a2, t1
mul a3, a3, a7
add a4, t1, a3
j .LBB3_7
.LBB3_5:
sw t5, 0(a1)
.LBB3_6:
lw a5, %lo(width)(t6)
addi a0, a0, 1
addi a1, a1, 4
addi a2, a2, 4
addi a4, a4, 4
bge a0, a5, .LBB3_2
.LBB3_7:
lw s2, -2008(a2)
lw a5, -2004(a2)
lw s0, -2000(a2)
lw s1, -8(a2)
lw a3, 0(a2)
addw a5, a5, s2
addw a5, a5, s0
addw a5, a5, s1
addw s2, a5, a3
lw a3, -8(a4)
lw s1, -4(a4)
lw a5, 0(a4)
lw s0, -4(a2)
addw a3, s2, a3
addw a3, a3, s1
add s1, a3, a5
xori s0, s0, 1
xori s1, s1, 2
or s0, s0, s1
sext.w s0, s0
beqz s0, .LBB3_5
addw a3, a3, a5
beq a3, t4, .LBB3_5
sw zero, 0(a1)
j .LBB3_6
.LBB3_10:
addi t2, t3, 1
blt t3, a2, .LBB3_3
.LBB3_11:
ld s0, 24(sp)
ld s1, 16(sp)
ld s2, 8(sp)
addi sp, sp, 32
ret
.Lfunc_end3:
.size step, .Lfunc_end3-step
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -144
sd ra, 136(sp)
sd s0, 128(sp)
sd s1, 120(sp)
sd s2, 112(sp)
sd s3, 104(sp)
sd s4, 96(sp)
sd s5, 88(sp)
sd s6, 80(sp)
sd s7, 72(sp)
sd s8, 64(sp)
sd s9, 56(sp)
sd s10, 48(sp)
sd s11, 40(sp)
call getint
lui s6, %hi(width)
sw a0, %lo(width)(s6)
call getint
lui s3, %hi(height)
sw a0, %lo(height)(s3)
call getint
lui s2, %hi(steps)
sw a0, %lo(steps)(s2)
call getch
lw a0, %lo(height)(s3)
blez a0, .LBB4_6
li a0, 1
lui a1, %hi(sheet1+2004)
addi s5, a1, %lo(sheet1+2004)
j .LBB4_3
.LBB4_2:
call getch
lw a1, %lo(height)(s3)
addi a0, s4, 1
addi s5, s5, 2000
bge s4, a1, .LBB4_6
.LBB4_3:
lw a1, %lo(width)(s6)
mv s4, a0
blez a1, .LBB4_2
li s1, 0
mv s0, s5
.LBB4_5:
call getch
addi a0, a0, -35
lw a1, %lo(width)(s6)
seqz a0, a0
sw a0, 0(s0)
addi s1, s1, 1
addi s0, s0, 4
blt s1, a1, .LBB4_5
j .LBB4_2
.LBB4_6:
li a0, 96
call _sysy_starttime
lw a0, %lo(steps)(s2)
blez a0, .LBB4_30
lui a1, %hi(height)
lw s3, %lo(height)(a1)
lui a1, %hi(width)
lw s4, %lo(width)(a1)
lui a1, %hi(active)
lw t5, %lo(active)(a1)
slti a1, s3, 1
slti a2, s4, 1
or ra, a1, a2
li t1, 1
lui a1, %hi(sheet1)
addi t6, a1, %lo(sheet1)
lui a1, %hi(sheet2)
addi s2, a1, %lo(sheet2)
addi a1, s2, 2004
sd a1, 16(sp)
addi a1, t6, 2008
sd a1, 8(sp)
li t4, 2000
li a7, 3
sd ra, 24(sp)
j .LBB4_11
.LBB4_8:
li t5, 1
.LBB4_9:
ld a1, 32(sp)
.LBB4_10:
addiw a0, a1, -1
bge t1, a1, .LBB4_29
.LBB4_11:
mv a1, a0
bne t5, t1, .LBB4_20
li t5, 2
bnez ra, .LBB4_10
sd a1, 32(sp)
lui a0, %hi(sheet1)
lw a6, %lo(sheet1)(a0)
lw t2, 4(t6)
lw s5, 2000(t6)
lw s8, 2004(t6)
li s9, 1
ld s11, 8(sp)
ld ra, 16(sp)
j .LBB4_15
.LBB4_14:
addi ra, ra, 2000
addi s11, s11, 2000
beq s10, s3, .LBB4_28
.LBB4_15:
mv s10, s9
mv a1, a6
mv a0, t2
mv a6, s5
mv t2, s8
addi s9, s9, 1
mul a2, s9, t4
add a2, a2, t6
lw s5, 0(a2)
lw s8, 4(a2)
mv a5, s11
mv s0, ra
mv a2, s4
mv t0, t2
mv s1, s8
mv s7, s5
mv s6, a6
j .LBB4_17
.LBB4_16:
sw t1, 0(s0)
addi a2, a2, -1
addi s0, s0, 4
addi a5, a5, 4
beqz a2, .LBB4_14
.LBB4_17:
mv a3, a1
mv a1, a0
lw a0, -2000(a5)
mv a4, s6
mv s6, t0
addw a3, a3, a1
lw t0, 0(a5)
addw a3, a3, a0
addw a3, a3, a4
mv a4, s7
mv s7, s1
lw s1, 2000(a5)
addw a3, a3, t0
addw a3, a3, a4
addw a3, a3, s7
add a4, a3, s1
xori t3, s6, 1
xori a4, a4, 2
or a4, t3, a4
sext.w a4, a4
beqz a4, .LBB4_16
addw a3, a3, s1
beq a3, a7, .LBB4_16
sw zero, 0(s0)
addi a2, a2, -1
addi s0, s0, 4
addi a5, a5, 4
bnez a2, .LBB4_17
j .LBB4_14
.LBB4_20:
li t5, 1
bnez ra, .LBB4_10
sd a1, 32(sp)
lui a0, %hi(sheet2)
lw t5, %lo(sheet2)(a0)
lw s5, 4(s2)
lw s6, 2000(s2)
lw s7, 2004(s2)
li s8, 1
addi s10, t6, 2004
addi s11, s2, 2008
j .LBB4_23
.LBB4_22:
addi s10, s10, 2000
addi s11, s11, 2000
beq s9, s3, .LBB4_8
.LBB4_23:
mv s9, s8
mv a6, t5
mv t0, s5
mv t5, s6
mv s5, s7
addi s8, s8, 1
mul a0, s8, t4
add a0, a0, s2
lw s6, 0(a0)
lw s7, 4(a0)
mv s0, s11
mv a5, s10
mv a0, s4
mv t2, s5
mv s1, s7
mv t3, s6
mv a4, t5
j .LBB4_25
.LBB4_24:
sw t1, 0(a5)
addi a0, a0, -1
addi a5, a5, 4
addi s0, s0, 4
beqz a0, .LBB4_22
.LBB4_25:
mv a2, a6
mv a6, t0
lw t0, -2000(s0)
mv a1, a4
mv a4, t2
addw a2, a6, a2
lw t2, 0(s0)
addw a2, a2, t0
addw a1, a1, a2
mv a2, t3
mv t3, s1
lw s1, 2000(s0)
addw a1, a1, t2
addw a1, a1, a2
addw a2, a1, t3
add a1, a2, s1
xori a3, a4, 1
xori a1, a1, 2
or a1, a1, a3
sext.w a1, a1
beqz a1, .LBB4_24
addw a1, a2, s1
beq a1, a7, .LBB4_24
sw zero, 0(a5)
addi a0, a0, -1
addi a5, a5, 4
addi s0, s0, 4
bnez a0, .LBB4_25
j .LBB4_22
.LBB4_28:
ld ra, 24(sp)
j .LBB4_9
.LBB4_29:
lui a0, %hi(active)
sw t5, %lo(active)(a0)
lui a0, %hi(steps)
sw zero, %lo(steps)(a0)
.LBB4_30:
li a0, 107
call _sysy_stoptime
lui a0, %hi(active)
lw a0, %lo(active)(a0)
lui a1, %hi(height)
lwu s3, %lo(height)(a1)
li a1, 2
sext.w s4, s3
bne a0, a1, .LBB4_35
blez s4, .LBB4_43
lui a0, %hi(width)
lw a0, %lo(width)(a0)
blez a0, .LBB4_36
slli a0, a0, 32
srli a0, a0, 32
slli s2, a0, 2
lui a0, %hi(sheet2+2004)
addi s1, a0, %lo(sheet2+2004)
lui a0, %hi(sheet1+2004)
addi s0, a0, %lo(sheet1+2004)
.LBB4_34:
mv a0, s0
mv a1, s1
mv a2, s2
call memcpy@plt
addi s3, s3, -1
addi s1, s1, 2000
addi s0, s0, 2000
bnez s3, .LBB4_34
.LBB4_35:
blez s4, .LBB4_43
.LBB4_36:
li s5, 1
lui a0, %hi(sheet1+2004)
addi s2, a0, %lo(sheet1+2004)
lui s6, %hi(width)
lui s3, %hi(height)
li a0, 1
j .LBB4_38
.LBB4_37:
li a0, 10
call putch
lw a1, %lo(height)(s3)
addi a0, s4, 1
addi s2, s2, 2000
bge s4, a1, .LBB4_43
.LBB4_38:
lw a1, %lo(width)(s6)
mv s4, a0
blez a1, .LBB4_37
li s0, 0
mv s1, s2
j .LBB4_41
.LBB4_40:
call putch
lw a0, %lo(width)(s6)
addi s0, s0, 1
addi s1, s1, 4
bge s0, a0, .LBB4_37
.LBB4_41:
lw a1, 0(s1)
li a0, 35
beq a1, s5, .LBB4_40
li a0, 46
j .LBB4_40
.LBB4_43:
li a0, 0
ld ra, 136(sp)
ld s0, 128(sp)
ld s1, 120(sp)
ld s2, 112(sp)
ld s3, 104(sp)
ld s4, 96(sp)
ld s5, 88(sp)
ld s6, 80(sp)
ld s7, 72(sp)
ld s8, 64(sp)
ld s9, 56(sp)
ld s10, 48(sp)
ld s11, 40(sp)
addi sp, sp, 144
ret
.Lfunc_end4:
.size main, .Lfunc_end4-main
.type sheet1,@object
.bss
.globl sheet1
.p2align 2
sheet1:
.zero 1000000
.size sheet1, 1000000
.type sheet2,@object
.globl sheet2
.p2align 2
sheet2:
.zero 1000000
.size sheet2, 1000000
.type active,@object
.section .sdata,"aw",@progbits
.globl active
.p2align 2
active:
.word 1
.size active, 4
.type width,@object
.section .sbss,"aw",@nobits
.globl width
.p2align 2
width:
.word 0
.size width, 4
.type height,@object
.globl height
.p2align 2
height:
.word 0
.size height, 4
.type steps,@object
.globl steps
.p2align 2
steps:
.word 0
.size steps, 4
.type _sysy_start,@object
.bss
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig