sysy-data/performance_c/asm/conv1.s

575 lines
7.8 KiB
ArmAsm
Raw Permalink Normal View History

2024-06-14 13:10:27 +08:00
.file "conv1.sy"
.option pic
.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl checkrange
.type checkrange, @function
checkrange:
li a5,1073741824
ble a0,a5,.L2
li a5,-1073741824
addw a0,a5,a0
ret
.L2:
bge a0,zero,.L3
not a5,a0
srliw a4,a5,30
li a5,1073741824
addw a0,a5,a0
slliw a5,a4,30
addw a0,a0,a5
.L3:
ret
.size checkrange, .-checkrange
.align 1
.globl reduce
.type reduce, @function
reduce:
li a5,4
mv a3,a0
bgtu a0,a5,.L21
lla a4,.L9
slli a5,a0,2
add a5,a5,a4
lw a5,0(a5)
add a5,a5,a4
jr a5
.section .rodata
.align 2
.align 2
.L9:
.word .L13-.L9
.word .L22-.L9
.word .L11-.L9
.word .L23-.L9
.word .L24-.L9
.text
.L23:
li a3,30
li a4,1
li a0,0
li t1,1
j .L10
.L29:
divw a5,a2,a4
srliw a6,a5,31
addw a5,a5,a6
andi a5,a5,1
subw a5,a5,a6
beq a5,t1,.L18
addiw a3,a3,-1
slliw a4,a4,1
beq a3,zero,.L28
.L10:
divw a5,a1,a4
slliw a0,a0,1
mv a7,a0
srliw a6,a5,31
addw a5,a5,a6
andi a5,a5,1
subw a5,a5,a6
bne a5,t1,.L29
.L18:
addiw a3,a3,-1
addiw a0,a7,1
slliw a4,a4,1
bne a3,zero,.L10
.L28:
ret
.L22:
li a6,30
li a0,0
.L12:
divw a4,a1,a3
slliw a0,a0,1
divw a5,a2,a3
srliw a7,a4,31
addw a4,a4,a7
andi a4,a4,1
subw a4,a4,a7
srliw a7,a5,31
addw a5,a5,a7
andi a5,a5,1
subw a5,a5,a7
beq a4,a5,.L16
addiw a0,a0,1
.L16:
addiw a6,a6,-1
slliw a3,a3,1
bne a6,zero,.L12
ret
.L24:
li a3,30
li a4,1
li a0,0
li t1,1
j .L8
.L20:
addiw a3,a3,-1
slliw a4,a4,1
beq a3,zero,.L30
.L8:
divw a5,a1,a4
slliw a0,a0,1
srliw a6,a5,31
addw a5,a5,a6
andi a5,a5,1
subw a5,a5,a6
bne a5,t1,.L20
divw a5,a2,a4
srliw a6,a5,31
addw a5,a5,a6
andi a5,a5,1
subw a5,a5,a6
bne a5,t1,.L20
addiw a3,a3,-1
addiw a0,a0,1
slliw a4,a4,1
bne a3,zero,.L8
.L30:
ret
.L11:
sext.w a0,a1
bge a1,a2,.L7
sext.w a0,a2
ret
.L13:
addw a1,a1,a2
sext.w a0,a1
li a5,1073741824
ble a0,a5,.L14
li a0,-1073741824
addw a0,a0,a1
ret
.L21:
li a0,0
.L7:
ret
.L14:
bge a0,zero,.L31
not a0,a1
srliw a0,a0,30
li a5,1073741824
addw a5,a5,a1
slliw a0,a0,30
addw a0,a0,a5
ret
.L31:
ret
.size reduce, .-reduce
.align 1
.globl getvalue
.type getvalue, @function
getvalue:
or a6,a3,a4
mv a5,a0
li a0,0
blt a6,zero,.L33
bge a3,a1,.L33
bge a4,a2,.L33
mulw a3,a3,a2
addw a4,a3,a4
slli a3,a4,2
add a0,a5,a3
lw a0,0(a0)
ret
.L33:
ret
.size getvalue, .-getvalue
.align 1
.globl convn
.type convn, @function
convn:
mv a6,a5
srliw a5,a5,31
addw a5,a5,a6
sraiw a5,a5,1
addi sp,sp,-208
negw a5,a5
sd s5,152(sp)
mv s5,a4
mulw a4,a5,a4
sd s11,104(sp)
mv s11,a3
sext.w a3,a5
sd a3,40(sp)
slli a3,s5,2
sd s0,192(sp)
sd s6,144(sp)
sd s8,128(sp)
sd ra,200(sp)
sd a4,32(sp)
negw a4,a5
slliw a5,a5,1
sd s1,184(sp)
sd s2,176(sp)
sd s3,168(sp)
sd s4,160(sp)
sd s7,136(sp)
sd s9,120(sp)
sd s10,112(sp)
sd a4,72(sp)
mv s0,a0
mv s6,a1
sext.w s8,s5
sd a3,80(sp)
sd a2,64(sp)
sd a5,88(sp)
sd a4,56(sp)
.L51:
ld a5,56(sp)
ld a4,88(sp)
sext.w s9,a5
addw a4,a5,a4
sd a4,48(sp)
ld a4,40(sp)
sd a4,16(sp)
ld a4,72(sp)
sd a4,8(sp)
ld a4,64(sp)
sd a4,24(sp)
.L50:
ld a5,8(sp)
lw s7,16(sp)
ld s2,32(sp)
slti s3,a5,1
negw s3,s3
ld s4,48(sp)
and s3,a5,s3
sext.w s1,a5
li a1,0
sext.w s3,s3
.L49:
mv s10,s7
blt s4,zero,.L38
bge s4,s11,.L39
blt s7,zero,.L42
.L47:
addw a4,s10,s2
slli a4,a4,2
add a4,s6,a4
mv a0,s0
ble s5,s10,.L44
.L64:
lw a2,0(a4)
addiw s10,s10,1
call reduce
mv a1,a0
blt s10,s1,.L47
.L48:
addiw s4,s4,1
addw s2,s2,s8
blt s4,s9,.L49
.L65:
ld a5,8(sp)
ld a4,24(sp)
addiw s10,a5,1
sw a1,0(a4)
addi a4,a4,4
sd a4,24(sp)
sext.w a4,s10
sd a4,8(sp)
ld a4,16(sp)
ld a5,40(sp)
addiw a4,a4,1
addw a5,s10,a5
sd a4,16(sp)
blt a5,s5,.L50
ld a5,56(sp)
ld a4,40(sp)
ld a3,80(sp)
addiw a5,a5,1
addw a4,a5,a4
sd a5,56(sp)
ld a5,32(sp)
addw a5,s8,a5
sd a5,32(sp)
ld a5,64(sp)
add a5,a5,a3
sd a5,64(sp)
blt a4,s11,.L51
ld ra,200(sp)
ld s0,192(sp)
ld s1,184(sp)
ld s2,176(sp)
ld s3,168(sp)
ld s4,160(sp)
ld s5,152(sp)
ld s6,144(sp)
ld s7,136(sp)
ld s8,128(sp)
ld s9,120(sp)
ld s10,112(sp)
ld s11,104(sp)
li a0,0
addi sp,sp,208
jr ra
.L38:
li a2,0
mv a0,s0
call reduce
addiw s10,s10,1
mv a1,a0
bge s10,s1,.L48
li a2,0
mv a0,s0
call reduce
addiw s10,s10,1
mv a1,a0
blt s10,s1,.L38
j .L48
.L39:
li a2,0
mv a0,s0
call reduce
addiw s10,s10,1
mv a1,a0
bge s10,s1,.L48
li a2,0
mv a0,s0
call reduce
addiw s10,s10,1
mv a1,a0
blt s10,s1,.L39
j .L48
.L42:
li a2,0
mv a0,s0
call reduce
addiw s10,s10,1
mv a1,a0
bgt s3,s10,.L42
bge s10,s1,.L48
addw a4,s10,s2
slli a4,a4,2
add a4,s6,a4
mv a0,s0
bgt s5,s10,.L64
.L44:
li a2,0
call reduce
addiw s10,s10,1
mv a1,a0
blt s10,s1,.L47
addiw s4,s4,1
addw s2,s2,s8
blt s4,s9,.L49
j .L65
.size convn, .-convn
.align 1
.globl memmove
.type memmove, @function
memmove:
ble a2,zero,.L66
addiw a5,a2,-1
li a4,8
sext.w a7,a2
bleu a5,a4,.L68
or a5,a1,a0
andi a5,a5,7
bne a5,zero,.L68
addi a5,a1,4
beq a0,a5,.L68
srliw a6,a7,1
slli a6,a6,3
mv a5,a1
mv a4,a0
add a6,a6,a1
.L69:
ld a3,0(a5)
addi a5,a5,8
addi a4,a4,8
sd a3,-8(a4)
bne a5,a6,.L69
andi a5,a2,-2
beq a7,a5,.L66
slli a5,a5,32
srli a5,a5,30
add a1,a1,a5
lw a4,0(a1)
add a0,a0,a5
sw a4,0(a0)
ret
.L68:
slli a5,a2,2
add a5,a1,a5
.L71:
lw a4,0(a1)
addi a1,a1,4
addi a0,a0,4
sw a4,-4(a0)
bne a1,a5,.L71
.L66:
ret
.size memmove, .-memmove
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-112
sd ra,104(sp)
sd s0,96(sp)
sd s2,80(sp)
sd s3,72(sp)
sd s4,64(sp)
sd s11,8(sp)
sd s1,88(sp)
sd s5,56(sp)
sd s6,48(sp)
sd s7,40(sp)
sd s8,32(sp)
sd s9,24(sp)
sd s10,16(sp)
call getint@plt
mv s4,a0
call getint@plt
mv s3,a0
call getint@plt
mv s2,a0
lla a0,a
call getarray@plt
lla a0,kernelid
call getarray@plt
mv s0,a0
li a0,111
mulw s11,s3,s2
call _sysy_starttime@plt
ble s0,zero,.L85
srliw s9,s11,1
slli s0,s0,2
lla s10,kernelid
lla s8,b
slli s9,s9,3
andi s7,s11,-2
add s1,s0,s10
li s5,1
lla s0,a
add s9,s8,s9
mv s6,s7
.L84:
lw a0,0(s10)
mv a5,s4
mv a4,s2
mv a3,s3
mv a2,s8
mv a1,s0
call convn
ble s11,zero,.L89
beq s11,s5,.L91
mv a5,s0
lla a4,a
mv a1,s8
.L87:
ld a3,0(a1)
addi a1,a1,8
addi a4,a4,8
sd a3,-8(a4)
bne s9,a1,.L87
mv a1,s7
beq s11,s6,.L89
.L86:
slli a1,a1,2
add a4,s8,a1
lw a4,0(a4)
add a1,a5,a1
sw a4,0(a1)
.L89:
addi s10,s10,4
bne s10,s1,.L84
.L85:
li a0,118
call _sysy_stoptime@plt
mv a0,s11
lla a1,a
call putarray@plt
ld ra,104(sp)
ld s0,96(sp)
ld s1,88(sp)
ld s2,80(sp)
ld s3,72(sp)
ld s4,64(sp)
ld s5,56(sp)
ld s6,48(sp)
ld s7,40(sp)
ld s8,32(sp)
ld s9,24(sp)
ld s10,16(sp)
ld s11,8(sp)
li a0,0
addi sp,sp,112
jr ra
.L91:
li a1,0
lla a5,a
j .L86
.size main, .-main
.globl kernelid
.globl b
.globl a
.globl _sysy_idx
.globl _sysy_us
.globl _sysy_s
.globl _sysy_m
.globl _sysy_h
.globl _sysy_l2
.globl _sysy_l1
.globl _sysy_end
.globl _sysy_start
.bss
.align 3
.type kernelid, @object
.size kernelid, 40000
kernelid:
.zero 40000
.type b, @object
.size b, 40000000
b:
.zero 40000000
.type a, @object
.size a, 40000000
a:
.zero 40000000
.type _sysy_idx, @object
.size _sysy_idx, 4
_sysy_idx:
.zero 4
.zero 4
.type _sysy_us, @object
.size _sysy_us, 4096
_sysy_us:
.zero 4096
.type _sysy_s, @object
.size _sysy_s, 4096
_sysy_s:
.zero 4096
.type _sysy_m, @object
.size _sysy_m, 4096
_sysy_m:
.zero 4096
.type _sysy_h, @object
.size _sysy_h, 4096
_sysy_h:
.zero 4096
.type _sysy_l2, @object
.size _sysy_l2, 4096
_sysy_l2:
.zero 4096
.type _sysy_l1, @object
.size _sysy_l1, 4096
_sysy_l1:
.zero 4096
.type _sysy_end, @object
.size _sysy_end, 16
_sysy_end:
.zero 16
.type _sysy_start, @object
.size _sysy_start, 16
_sysy_start:
.zero 16
.ident "GCC: (Debian 12.2.0-13) 12.2.0"
.section .note.GNU-stack,"",@progbits