sysy-data/performance_c/asm/conv1.s

662 lines
9.7 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "conv1.sy"
.globl checkrange
.p2align 1
.type checkrange,@function
checkrange:
lui a4, 262144
addiw a1, a4, -1
addw a2, a0, a1
mv a3, a0
blt a0, a4, .LBB0_2
lui a3, 262144
.LBB0_2:
subw a2, a2, a3
lui a3, 786432
and a2, a2, a3
subw a4, a0, a2
bgtz a4, .LBB0_4
mv a4, a2
j .LBB0_5
.LBB0_4:
add a4, a4, a2
.LBB0_5:
add a1, a1, a4
sub a1, a1, a0
and a1, a1, a3
addw a0, a0, a1
subw a0, a0, a2
ret
.Lfunc_end0:
.size checkrange, .Lfunc_end0-checkrange
.globl reduce
.p2align 1
.type reduce,@function
reduce:
li a4, 4
bltu a4, a0, .LBB1_20
mv a3, a0
slli a0, a0, 2
lui a4, %hi(.LJTI1_0)
addi a4, a4, %lo(.LJTI1_0)
add a0, a0, a4
lw a0, 0(a0)
jr a0
.LBB1_2:
addw a3, a2, a1
add a1, a1, a2
lui a5, 262144
addiw a4, a5, -1
addw a0, a1, a4
mv a2, a3
blt a3, a5, .LBB1_4
lui a2, 262144
.LBB1_4:
subw a2, a0, a2
lui a0, 786432
and a5, a2, a0
subw a2, a3, a5
sub a1, a4, a1
add a1, a1, a5
blez a2, .LBB1_6
add a1, a1, a2
.LBB1_6:
and a0, a0, a1
addw a0, a0, a2
ret
.LBB1_7:
blt a2, a1, .LBB1_9
mv a1, a2
.LBB1_9:
mv a0, a1
ret
.LBB1_10:
li a0, 0
li a6, 1
li a4, 1
j .LBB1_12
.LBB1_11:
slliw a0, a0, 1
ori a0, a0, 1
srliw a3, a4, 29
slliw a4, a4, 1
bnez a3, .LBB1_23
.LBB1_12:
divw a5, a1, a4
srliw a3, a5, 31
add a3, a3, a5
andi a3, a3, -2
subw a3, a5, a3
beq a3, a6, .LBB1_11
divw a3, a2, a4
srliw a5, a3, 31
add a5, a5, a3
andi a5, a5, -2
subw a3, a3, a5
beq a3, a6, .LBB1_11
slliw a0, a0, 1
srliw a3, a4, 29
slliw a4, a4, 1
beqz a3, .LBB1_12
j .LBB1_23
.LBB1_15:
li a0, 0
li a6, 1
li a4, 1
j .LBB1_17
.LBB1_16:
slliw a0, a0, 1
srliw a3, a4, 29
slliw a4, a4, 1
bnez a3, .LBB1_23
.LBB1_17:
divw a5, a1, a4
srliw a3, a5, 31
add a3, a3, a5
andi a3, a3, -2
subw a3, a5, a3
bne a3, a6, .LBB1_16
divw a3, a2, a4
srliw a5, a3, 31
add a5, a5, a3
andi a5, a5, -2
subw a3, a3, a5
bne a3, a6, .LBB1_16
slliw a0, a0, 1
ori a0, a0, 1
srliw a3, a4, 29
slliw a4, a4, 1
beqz a3, .LBB1_17
j .LBB1_23
.LBB1_20:
li a0, 0
ret
.LBB1_21:
li a0, 0
.LBB1_22:
divw a4, a1, a3
srliw a5, a4, 31
add a5, a5, a4
andi a5, a5, -2
subw a6, a4, a5
divw a5, a2, a3
srliw a4, a5, 31
add a4, a4, a5
andi a4, a4, -2
subw a4, a5, a4
xor a4, a6, a4
snez a4, a4
slliw a0, a0, 1
srliw a5, a3, 29
slliw a3, a3, 1
or a0, a0, a4
beqz a5, .LBB1_22
.LBB1_23:
ret
.Lfunc_end1:
.size reduce, .Lfunc_end1-reduce
.section .rodata,"a",@progbits
.p2align 2
.LJTI1_0:
.word .LBB1_2
.word .LBB1_21
.word .LBB1_7
.word .LBB1_10
.word .LBB1_15
.text
.globl getvalue
.p2align 1
.type getvalue,@function
getvalue:
or a5, a4, a3
slti a5, a5, 0
slt a1, a3, a1
xori a1, a1, 1
or a1, a1, a5
slt a5, a4, a2
xori a5, a5, 1
or a5, a5, a1
li a1, 0
bnez a5, .LBB2_2
mulw a1, a3, a2
addw a1, a1, a4
slli a1, a1, 2
add a0, a0, a1
lw a1, 0(a0)
.LBB2_2:
mv a0, a1
ret
.Lfunc_end2:
.size getvalue, .Lfunc_end2-getvalue
.globl convn
.p2align 1
.type convn,@function
convn:
addi sp, sp, -160
sd ra, 152(sp)
sd s0, 144(sp)
sd s1, 136(sp)
sd s2, 128(sp)
sd s3, 120(sp)
sd s4, 112(sp)
sd s5, 104(sp)
sd s6, 96(sp)
sd s7, 88(sp)
sd s8, 80(sp)
sd s9, 72(sp)
sd s10, 64(sp)
sd s11, 56(sp)
sd a2, 40(sp)
srliw s0, a5, 31
li a6, 1
addw s0, s0, a5
mv a5, a4
blt a6, a4, .LBB3_2
li a5, 1
.LBB3_2:
lui t0, 262144
sraiw a2, s0, 1
sd a2, 48(sp)
mv s1, a3
blt a6, a3, .LBB3_4
li s1, 1
.LBB3_4:
li s0, 0
addiw t3, t0, -1
ld a2, 48(sp)
neg a2, a2
slli s1, s1, 32
srli s1, s1, 32
sd s1, 8(sp)
slli a5, a5, 32
srli t2, a5, 32
li t4, 4
lui a5, %hi(.LJTI3_0)
addi t6, a5, %lo(.LJTI3_0)
lui t5, 786432
li ra, 1
sd a2, 16(sp)
mv a5, a2
j .LBB3_6
.LBB3_5:
ld s0, 32(sp)
addi s0, s0, 1
ld a5, 24(sp)
addiw a5, a5, 1
ld a2, 8(sp)
beq s0, a2, .LBB3_37
.LBB3_6:
li s6, 0
sd a5, 24(sp)
sext.w s4, a5
mul s3, s0, a4
ld a2, 48(sp)
sd s0, 32(sp)
add s7, s0, a2
ld s5, 16(sp)
j .LBB3_8
.LBB3_7:
add a2, s6, s3
slli a2, a2, 2
ld a5, 40(sp)
add a2, a2, a5
sw a6, 0(a2)
addi s6, s6, 1
addiw s5, s5, 1
beq s6, t2, .LBB3_5
.LBB3_8:
li a6, 0
sext.w s2, s5
ld a2, 48(sp)
add s10, s6, a2
mv s11, s4
j .LBB3_10
.LBB3_9:
addi s11, s11, 1
bge s11, s7, .LBB3_7
.LBB3_10:
mul s9, s11, a4
mv s8, s2
j .LBB3_13
.LBB3_11:
mv a6, t1
.LBB3_12:
addi s8, s8, 1
bge s8, s10, .LBB3_9
.LBB3_13:
slt a5, s11, a3
xori a5, a5, 1
or s0, s8, s11
srliw s0, s0, 31
or a5, a5, s0
slt s0, s8, a4
xori s0, s0, 1
or s0, s0, a5
li a7, 0
mv t1, a6
bnez s0, .LBB3_15
add s1, s8, s9
slli s1, s1, 2
add s1, s1, a1
lw a7, 0(s1)
.LBB3_15:
bltu t4, a0, .LBB3_22
slli s1, a0, 2
add s1, s1, t6
lw s0, 0(s1)
jr s0
.LBB3_17:
addw s1, a7, t1
lui t0, 262144
mv a6, s1
blt s1, t0, .LBB3_19
lui a6, 262144
.LBB3_19:
add s0, a7, t1
addw a5, s0, t3
subw a5, a5, a6
and a5, a5, t5
subw s1, s1, a5
sub s0, t3, s0
add a5, a5, s0
blez s1, .LBB3_21
add a5, a5, s1
.LBB3_21:
and a5, a5, t5
addw a6, a5, s1
j .LBB3_12
.LBB3_22:
li a6, 0
j .LBB3_12
.LBB3_23:
li a6, 0
mv s0, a0
.LBB3_24:
divw a5, t1, s0
srliw a2, a5, 31
add a2, a2, a5
andi a2, a2, -2
subw a2, a5, a2
divw a5, a7, s0
srliw s1, a5, 31
add s1, s1, a5
andi s1, s1, -2
subw a5, a5, s1
xor a2, a2, a5
snez a2, a2
slliw a5, a6, 1
srliw s1, s0, 29
slliw s0, s0, 1
or a6, a5, a2
beqz s1, .LBB3_24
j .LBB3_12
.LBB3_25:
blt a7, t1, .LBB3_11
mv t1, a7
j .LBB3_11
.LBB3_27:
li a6, 0
li t0, 1
j .LBB3_29
.LBB3_28:
slliw a2, a6, 1
ori a6, a2, 1
srliw a2, t0, 29
slliw t0, t0, 1
bnez a2, .LBB3_12
.LBB3_29:
divw a2, t1, t0
srliw a5, a2, 31
add a5, a5, a2
andi a5, a5, -2
subw a2, a2, a5
beq a2, ra, .LBB3_28
divw a2, a7, t0
srliw a5, a2, 31
add a5, a5, a2
andi a5, a5, -2
subw a2, a2, a5
beq a2, ra, .LBB3_28
slliw a6, a6, 1
srliw a2, t0, 29
slliw t0, t0, 1
beqz a2, .LBB3_29
j .LBB3_12
.LBB3_32:
li a6, 0
li s0, 1
j .LBB3_34
.LBB3_33:
slliw a6, a6, 1
srliw a2, s0, 29
slliw s0, s0, 1
bnez a2, .LBB3_12
.LBB3_34:
divw a2, t1, s0
srliw a5, a2, 31
add a5, a5, a2
andi a5, a5, -2
subw a2, a2, a5
bne a2, ra, .LBB3_33
divw a2, a7, s0
srliw a5, a2, 31
add a5, a5, a2
andi a5, a5, -2
subw a2, a2, a5
bne a2, ra, .LBB3_33
slliw a2, a6, 1
ori a6, a2, 1
srliw a2, s0, 29
slliw s0, s0, 1
beqz a2, .LBB3_34
j .LBB3_12
.LBB3_37:
li a0, 0
ld ra, 152(sp)
ld s0, 144(sp)
ld s1, 136(sp)
ld s2, 128(sp)
ld s3, 120(sp)
ld s4, 112(sp)
ld s5, 104(sp)
ld s6, 96(sp)
ld s7, 88(sp)
ld s8, 80(sp)
ld s9, 72(sp)
ld s10, 64(sp)
ld s11, 56(sp)
addi sp, sp, 160
ret
.Lfunc_end3:
.size convn, .Lfunc_end3-convn
.section .rodata,"a",@progbits
.p2align 2
.LJTI3_0:
.word .LBB3_17
.word .LBB3_23
.word .LBB3_25
.word .LBB3_27
.word .LBB3_32
.text
.globl memmove
.p2align 1
.type memmove,@function
memmove:
blez a2, .LBB4_3
slli a2, a2, 32
srli a2, a2, 32
.LBB4_2:
lw a3, 0(a1)
sw a3, 0(a0)
addi a0, a0, 4
addi a2, a2, -1
addi a1, a1, 4
bnez a2, .LBB4_2
.LBB4_3:
ret
.Lfunc_end4:
.size memmove, .Lfunc_end4-memmove
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -96
sd ra, 88(sp)
sd s0, 80(sp)
sd s1, 72(sp)
sd s2, 64(sp)
sd s3, 56(sp)
sd s4, 48(sp)
sd s5, 40(sp)
sd s6, 32(sp)
sd s7, 24(sp)
sd s8, 16(sp)
sd s9, 8(sp)
call getint
mv s3, a0
call getint
mv s5, a0
call getint
mv s6, a0
lui a0, %hi(a)
addi s2, a0, %lo(a)
mv a0, s2
call getarray
lui a0, %hi(kernelid)
addi a0, a0, %lo(kernelid)
call getarray
mv s1, a0
li a0, 110
call _sysy_starttime
mulw s4, s6, s5
blez s1, .LBB5_6
blez s4, .LBB5_4
slli a0, s4, 32
srli s7, a0, 30
lui a0, %hi(kernelid)
addi s0, a0, %lo(kernelid)
slli a0, s1, 32
srli s1, a0, 32
lui a0, %hi(a)
addi s8, a0, %lo(a)
lui a0, %hi(b)
addi s9, a0, %lo(b)
.LBB5_3:
lw a0, 0(s0)
mv a1, s8
mv a2, s9
mv a3, s5
mv a4, s6
mv a5, s3
call convn
mv a0, s8
mv a1, s9
mv a2, s7
call memcpy@plt
addi s1, s1, -1
addi s0, s0, 4
bnez s1, .LBB5_3
j .LBB5_6
.LBB5_4:
lui a0, %hi(kernelid)
addi s0, a0, %lo(kernelid)
slli a0, s1, 32
srli s1, a0, 32
lui a0, %hi(a)
addi s7, a0, %lo(a)
lui a0, %hi(b)
addi s8, a0, %lo(b)
.LBB5_5:
lw a0, 0(s0)
mv a1, s7
mv a2, s8
mv a3, s5
mv a4, s6
mv a5, s3
call convn
addi s1, s1, -1
addi s0, s0, 4
bnez s1, .LBB5_5
.LBB5_6:
li a0, 117
call _sysy_stoptime
mv a0, s4
mv a1, s2
call putarray
li a0, 0
ld ra, 88(sp)
ld s0, 80(sp)
ld s1, 72(sp)
ld s2, 64(sp)
ld s3, 56(sp)
ld s4, 48(sp)
ld s5, 40(sp)
ld s6, 32(sp)
ld s7, 24(sp)
ld s8, 16(sp)
ld s9, 8(sp)
addi sp, sp, 96
ret
.Lfunc_end5:
.size main, .Lfunc_end5-main
.type a,@object
.bss
.globl a
.p2align 2
a:
.zero 40000000
.size a, 40000000
.type kernelid,@object
.globl kernelid
.p2align 2
kernelid:
.zero 40000
.size kernelid, 40000
.type b,@object
.globl b
.p2align 2
b:
.zero 40000000
.size b, 40000000
.type _sysy_start,@object
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym a
.addrsig_sym kernelid
.addrsig_sym b