sysy-data/performance_c/asm/03_sort1.s

694 lines
10 KiB
ArmAsm
Raw Normal View History

2024-06-14 13:34:46 +08:00
.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "03_sort1.sy"
.globl getMaxNum
.p2align 1
.type getMaxNum,@function
getMaxNum:
blez a0, .LBB0_6
li a3, 0
slli a0, a0, 32
srli a2, a0, 32
j .LBB0_3
.LBB0_2:
addi a2, a2, -1
addi a1, a1, 4
mv a3, a0
beqz a2, .LBB0_5
.LBB0_3:
lw a0, 0(a1)
blt a3, a0, .LBB0_2
mv a0, a3
j .LBB0_2
.LBB0_5:
ret
.LBB0_6:
li a0, 0
ret
.Lfunc_end0:
.size getMaxNum, .Lfunc_end0-getMaxNum
.globl getNumPos
.p2align 1
.type getNumPos,@function
getNumPos:
blez a1, .LBB1_3
li a2, 0
.LBB1_2:
sraiw a3, a0, 31
srliw a3, a3, 28
addw a0, a0, a3
addiw a2, a2, 1
sraiw a0, a0, 4
blt a2, a1, .LBB1_2
.LBB1_3:
sraiw a1, a0, 31
srliw a1, a1, 28
add a1, a1, a0
andi a1, a1, -16
subw a0, a0, a1
ret
.Lfunc_end1:
.size getNumPos, .Lfunc_end1-getNumPos
.globl radixSort
.p2align 1
.type radixSort,@function
radixSort:
addi sp, sp, -256
sd ra, 248(sp)
sd s0, 240(sp)
sd s1, 232(sp)
sd s2, 224(sp)
sd s3, 216(sp)
sd s4, 208(sp)
sd s5, 200(sp)
mv s5, a1
sd zero, 64(sp)
sd zero, 56(sp)
sd zero, 48(sp)
sd zero, 40(sp)
sd zero, 32(sp)
sd zero, 24(sp)
sd zero, 16(sp)
addi a1, a0, 1
snez a1, a1
addiw a4, a2, 1
slt a4, a4, a3
and a1, a1, a4
sd zero, 8(sp)
beqz a1, .LBB2_8
blez a0, .LBB2_9
addi a6, sp, 8
mv a4, a2
.LBB2_3:
slli a5, a4, 2
add a5, a5, s5
lw s0, 0(a5)
li s1, 0
mv a5, s0
.LBB2_4:
sraiw a1, a5, 31
srliw a1, a1, 28
addw a1, a1, a5
addiw s1, s1, 1
sraiw a5, a1, 4
blt s1, a0, .LBB2_4
srliw a1, a5, 28
add a1, a1, a5
andi a1, a1, -16
sub a1, a5, a1
slli a1, a1, 2
add a1, a1, a6
lw s1, 0(a1)
li a5, 0
.LBB2_6:
sraiw a1, s0, 31
srliw a1, a1, 28
addw a1, a1, s0
addiw a5, a5, 1
sraiw s0, a1, 4
blt a5, a0, .LBB2_6
addiw a1, s1, 1
srliw a5, s0, 28
add a5, a5, s0
andi a5, a5, -16
sub a5, s0, a5
slli a5, a5, 2
add a5, a5, a6
addi a4, a4, 1
sw a1, 0(a5)
blt a4, a3, .LBB2_3
j .LBB2_11
.LBB2_8:
ld ra, 248(sp)
ld s0, 240(sp)
ld s1, 232(sp)
ld s2, 224(sp)
ld s3, 216(sp)
ld s4, 208(sp)
ld s5, 200(sp)
addi sp, sp, 256
ret
.LBB2_9:
slli a1, a2, 2
add a1, a1, s5
addi a4, sp, 8
mv a5, a2
.LBB2_10:
lw s1, 0(a1)
sraiw s0, s1, 31
srliw s0, s0, 28
add s0, s0, s1
andi s0, s0, -16
subw s1, s1, s0
slli s1, s1, 2
add s1, s1, a4
lw s0, 0(s1)
addiw s0, s0, 1
sw s0, 0(s1)
addi a5, a5, 1
addi a1, a1, 4
blt a5, a3, .LBB2_10
.LBB2_11:
lw a1, 8(sp)
sw a2, 136(sp)
lw a4, 12(sp)
addw a3, a1, a2
sw a3, 72(sp)
sw a3, 140(sp)
addw a1, a4, a3
lw a4, 16(sp)
sw a1, 76(sp)
sw a1, 144(sp)
lw a5, 20(sp)
addw a1, a1, a4
sw a1, 80(sp)
sw a1, 148(sp)
addw a1, a1, a5
lw a4, 24(sp)
sw a1, 84(sp)
sw a1, 152(sp)
lw a5, 28(sp)
addw a1, a1, a4
sw a1, 88(sp)
sw a1, 156(sp)
addw a1, a1, a5
lw a4, 32(sp)
sw a1, 92(sp)
sw a1, 160(sp)
lw a5, 36(sp)
addw a1, a1, a4
sw a1, 96(sp)
sw a1, 164(sp)
addw a1, a1, a5
lw a4, 40(sp)
sw a1, 100(sp)
sw a1, 168(sp)
lw a5, 44(sp)
addw a1, a1, a4
sw a1, 104(sp)
sw a1, 172(sp)
addw a1, a1, a5
lw a4, 48(sp)
sw a1, 108(sp)
sw a1, 176(sp)
lw a5, 52(sp)
addw a1, a1, a4
sw a1, 112(sp)
sw a1, 180(sp)
addw a1, a1, a5
lw a4, 56(sp)
sw a1, 116(sp)
sw a1, 184(sp)
lw a5, 60(sp)
addw a1, a1, a4
sw a1, 120(sp)
sw a1, 188(sp)
addw a1, a1, a5
lw a4, 64(sp)
sw a1, 124(sp)
sw a1, 192(sp)
lw a5, 68(sp)
addw a1, a1, a4
sw a1, 128(sp)
sw a1, 196(sp)
addw a1, a1, a5
sw a1, 132(sp)
blez a0, .LBB2_29
li t2, 0
addi t3, sp, 136
addi a6, sp, 72
li a7, 16
j .LBB2_14
.LBB2_13:
addi t2, t2, 1
beq t2, a7, .LBB2_36
.LBB2_14:
slli a1, t2, 2
add t1, t3, a1
add a1, a1, a6
lw t0, 0(a1)
lw a1, 0(t1)
blt a1, t0, .LBB2_16
j .LBB2_13
.LBB2_15:
lw a1, 0(t1)
slli a5, a1, 2
add a5, a5, s5
sw a4, 0(a5)
addiw a1, a1, 1
sw a1, 0(t1)
bge a1, t0, .LBB2_13
.LBB2_16:
slli a1, a1, 2
add a1, a1, s5
lw a4, 0(a1)
.LBB2_17:
li s1, 0
mv a5, a4
.LBB2_18:
sraiw a1, a5, 31
srliw a1, a1, 28
addw a1, a1, a5
addiw s1, s1, 1
sraiw a5, a1, 4
blt s1, a0, .LBB2_18
srliw a1, a5, 28
add a1, a1, a5
andi a1, a1, -16
subw a1, a5, a1
slli a1, a1, 32
srli a1, a1, 32
beq t2, a1, .LBB2_15
li s1, 0
mv a5, a4
.LBB2_21:
sraiw a1, a5, 31
srliw a1, a1, 28
addw a1, a1, a5
addiw s1, s1, 1
sraiw a5, a1, 4
blt s1, a0, .LBB2_21
srliw a1, a5, 28
add a1, a1, a5
andi a1, a1, -16
sub a1, a5, a1
slli a1, a1, 2
add a1, a1, t3
lw a1, 0(a1)
slli a1, a1, 2
add a1, a1, s5
lw a5, 0(a1)
li s0, 0
mv s1, a4
.LBB2_23:
sraiw a1, s1, 31
srliw a1, a1, 28
addw a1, a1, s1
addiw s0, s0, 1
sraiw s1, a1, 4
blt s0, a0, .LBB2_23
srliw a1, s1, 28
add a1, a1, s1
andi a1, a1, -16
sub a1, s1, a1
slli a1, a1, 2
add a1, a1, t3
lw a1, 0(a1)
li s1, 0
slli a1, a1, 2
add a1, a1, s5
sw a4, 0(a1)
mv a1, a4
.LBB2_25:
sraiw s0, a1, 31
srliw s0, s0, 28
addw a1, a1, s0
addiw s1, s1, 1
sraiw a1, a1, 4
blt s1, a0, .LBB2_25
srliw s1, a1, 28
add s1, s1, a1
andi s1, s1, -16
sub a1, a1, s1
slli a1, a1, 2
add a1, a1, t3
lw s0, 0(a1)
li s1, 0
.LBB2_27:
sraiw a1, a4, 31
srliw a1, a1, 28
addw a1, a1, a4
addiw s1, s1, 1
sraiw a4, a1, 4
blt s1, a0, .LBB2_27
addiw a1, s0, 1
srliw s1, a4, 28
add s1, s1, a4
andi s1, s1, -16
sub a4, a4, s1
slli a4, a4, 2
add a4, a4, t3
sw a1, 0(a4)
mv a4, a5
j .LBB2_17
.LBB2_29:
li t3, 0
addi t2, sp, 136
addi a6, sp, 72
li a7, 16
j .LBB2_31
.LBB2_30:
addi t3, t3, 1
beq t3, a7, .LBB2_36
.LBB2_31:
slli a5, t3, 2
add t1, t2, a5
add a5, a5, a6
lw t0, 0(a5)
lw a5, 0(t1)
blt a5, t0, .LBB2_33
j .LBB2_30
.LBB2_32:
slli a1, a5, 2
add a1, a1, s5
sw s1, 0(a1)
addiw a5, a5, 1
sw a5, 0(t1)
bge a5, t0, .LBB2_30
.LBB2_33:
slli s1, a5, 2
add s1, s1, s5
lw s1, 0(s1)
sraiw s0, s1, 31
srliw s0, s0, 28
add s0, s0, s1
andi s0, s0, -16
subw s0, s1, s0
slli s0, s0, 32
srli s0, s0, 32
beq t3, s0, .LBB2_32
.LBB2_34:
mv a5, s1
sraiw s1, s1, 31
srliw s1, s1, 28
add s1, s1, a5
andi s1, s1, -16
subw s1, a5, s1
slli s1, s1, 2
add s0, t2, s1
lw a4, 0(s0)
slli s1, a4, 2
add a1, s5, s1
lw s1, 0(a1)
sw a5, 0(a1)
addiw a1, a4, 1
sraiw a4, s1, 31
srliw a4, a4, 28
add a4, a4, s1
andi a4, a4, -16
subw a4, s1, a4
slli a4, a4, 32
srli a4, a4, 32
sw a1, 0(s0)
bne t3, a4, .LBB2_34
lw a5, 0(t1)
j .LBB2_32
.LBB2_36:
sw a3, 72(sp)
addiw s2, a0, -1
mv a0, s2
mv a1, s5
call radixSort
lw a2, 72(sp)
lw a0, 12(sp)
sw a2, 140(sp)
addw s1, a0, a2
sw s1, 76(sp)
mv a0, s2
mv a1, s5
mv a3, s1
call radixSort
lw a0, 16(sp)
sw s1, 144(sp)
addw s3, a0, s1
sw s3, 80(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 20(sp)
sw s3, 148(sp)
addw s1, a0, s3
sw s1, 84(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s1
call radixSort
lw a0, 24(sp)
sw s1, 152(sp)
addw s3, a0, s1
sw s3, 88(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 28(sp)
sw s3, 156(sp)
addw s1, a0, s3
sw s1, 92(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s1
call radixSort
lw a0, 32(sp)
sw s1, 160(sp)
addw s3, a0, s1
sw s3, 96(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 36(sp)
sw s3, 164(sp)
addw s1, a0, s3
sw s1, 100(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s1
call radixSort
lw a0, 40(sp)
sw s1, 168(sp)
addw s3, a0, s1
sw s3, 104(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 44(sp)
sw s3, 172(sp)
addw s1, a0, s3
sw s1, 108(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s1
call radixSort
lw a0, 48(sp)
sw s1, 176(sp)
addw s3, a0, s1
sw s3, 112(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 52(sp)
sw s3, 180(sp)
addw s1, a0, s3
sw s1, 116(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s1
call radixSort
lw a0, 56(sp)
sw s1, 184(sp)
addw s3, a0, s1
sw s3, 120(sp)
mv a0, s2
mv a1, s5
mv a2, s1
mv a3, s3
call radixSort
lw a0, 60(sp)
sw s3, 188(sp)
addw s4, a0, s3
sw s4, 124(sp)
mv a0, s2
mv a1, s5
mv a2, s3
mv a3, s4
call radixSort
lw a0, 64(sp)
sw s4, 192(sp)
addw s1, a0, s4
sw s1, 128(sp)
mv a0, s2
mv a1, s5
mv a2, s4
mv a3, s1
call radixSort
lw a0, 68(sp)
sw s1, 196(sp)
addw a3, a0, s1
sw a3, 132(sp)
mv a0, s2
mv a1, s5
mv a2, s1
ld ra, 248(sp)
ld s0, 240(sp)
ld s1, 232(sp)
ld s2, 224(sp)
ld s3, 216(sp)
ld s4, 208(sp)
ld s5, 200(sp)
addi sp, sp, 256
tail radixSort
.Lfunc_end2:
.size radixSort, .Lfunc_end2-radixSort
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -32
sd ra, 24(sp)
sd s0, 16(sp)
sd s1, 8(sp)
lui a0, %hi(a)
addi s0, a0, %lo(a)
mv a0, s0
call getarray
mv s1, a0
li a0, 91
call _sysy_starttime
li a0, 8
mv a1, s0
li a2, 0
mv a3, s1
call radixSort
lui a0, %hi(ans)
lw a0, %lo(ans)(a0)
blez s1, .LBB3_4
li a1, 0
slli a2, s1, 32
srli a2, a2, 32
.LBB3_2:
lw a3, 0(s0)
addiw a4, a1, 2
remw a3, a3, a4
mulw a3, a3, a1
addw a0, a0, a3
addi a1, a1, 1
addi s0, s0, 4
bne a2, a1, .LBB3_2
lui a1, %hi(ans)
sw a0, %lo(ans)(a1)
.LBB3_4:
bgez a0, .LBB3_6
negw a0, a0
lui a1, %hi(ans)
sw a0, %lo(ans)(a1)
.LBB3_6:
li a0, 103
call _sysy_stoptime
lui a0, %hi(ans)
lw a0, %lo(ans)(a0)
call putint
li a0, 10
call putch
li a0, 0
ld ra, 24(sp)
ld s0, 16(sp)
ld s1, 8(sp)
addi sp, sp, 32
ret
.Lfunc_end3:
.size main, .Lfunc_end3-main
.type a,@object
.bss
.globl a
.p2align 2
a:
.zero 120000040
.size a, 120000040
.type ans,@object
.section .sbss,"aw",@nobits
.globl ans
.p2align 2
ans:
.word 0
.size ans, 4
.type _sysy_start,@object
.bss
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym a