sysy-data/performance_c/asm/fft2.s

500 lines
7.0 KiB
ArmAsm

.text
.attribute 4, 16
.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
.file "fft2.sy"
.globl multiply
.p2align 1
.type multiply,@function
multiply:
addi sp, sp, -48
sd ra, 40(sp)
sd s0, 32(sp)
sd s1, 24(sp)
sd s2, 16(sp)
sd s3, 8(sp)
mv s1, a1
beqz a1, .LBB0_3
mv s0, a0
li s2, 1
bne s1, s2, .LBB0_4
lui a0, 70493
addiw a0, a0, -2031
mul a0, s0, a0
srli a1, a0, 63
srai a0, a0, 58
addw a0, a0, a1
lui a1, 243712
addiw a1, a1, 1
mulw a0, a0, a1
subw a0, s0, a0
j .LBB0_6
.LBB0_3:
mv a0, s1
j .LBB0_6
.LBB0_4:
srliw a0, s1, 31
add s3, s1, a0
sraiw a1, s3, 1
mv a0, s0
call multiply
slliw a0, a0, 1
lui a1, 70493
addiw a1, a1, -2031
mul a2, a0, a1
srli a3, a2, 63
srai a2, a2, 58
addw a3, a3, a2
lui a2, 243712
addiw a2, a2, 1
mulw a3, a3, a2
andi a4, s3, -2
subw a4, s1, a4
subw a0, a0, a3
bne a4, s2, .LBB0_6
addw a0, a0, s0
mul a1, a0, a1
srli a3, a1, 63
srai a1, a1, 58
addw a1, a1, a3
mulw a1, a1, a2
subw a0, a0, a1
.LBB0_6:
ld ra, 40(sp)
ld s0, 32(sp)
ld s1, 24(sp)
ld s2, 16(sp)
ld s3, 8(sp)
addi sp, sp, 48
ret
.Lfunc_end0:
.size multiply, .Lfunc_end0-multiply
.globl power
.p2align 1
.type power,@function
power:
addi sp, sp, -32
sd ra, 24(sp)
sd s0, 16(sp)
sd s1, 8(sp)
sd s2, 0(sp)
beqz a1, .LBB1_3
mv s1, a1
mv s2, a0
srliw a0, a1, 31
add s0, a1, a0
sraiw a1, s0, 1
mv a0, s2
call power
mv a1, a0
call multiply
andi a1, s0, -2
subw a1, s1, a1
li a2, 1
bne a1, a2, .LBB1_4
mv a1, s2
ld ra, 24(sp)
ld s0, 16(sp)
ld s1, 8(sp)
ld s2, 0(sp)
addi sp, sp, 32
tail multiply
.LBB1_3:
li a0, 1
.LBB1_4:
ld ra, 24(sp)
ld s0, 16(sp)
ld s1, 8(sp)
ld s2, 0(sp)
addi sp, sp, 32
ret
.Lfunc_end1:
.size power, .Lfunc_end1-power
.globl memmove
.p2align 1
.type memmove,@function
memmove:
blez a3, .LBB2_4
slli a4, a3, 32
srli a4, a4, 32
slli a1, a1, 2
add a0, a0, a1
.LBB2_2:
lw a1, 0(a2)
sw a1, 0(a0)
addi a0, a0, 4
addi a4, a4, -1
addi a2, a2, 4
bnez a4, .LBB2_2
mv a0, a3
ret
.LBB2_4:
li a0, 0
ret
.Lfunc_end2:
.size memmove, .Lfunc_end2-memmove
.globl fft
.p2align 1
.type fft,@function
fft:
addi sp, sp, -80
sd ra, 72(sp)
sd s0, 64(sp)
sd s1, 56(sp)
sd s2, 48(sp)
sd s3, 40(sp)
sd s4, 32(sp)
sd s5, 24(sp)
sd s6, 16(sp)
sd s7, 8(sp)
li a4, 1
beq a2, a4, .LBB3_14
mv s7, a3
mv s4, a2
mv s2, a1
mv s3, a0
slli s6, a1, 2
blez a2, .LBB3_10
li a1, 0
srliw a2, s4, 1
slli a0, s4, 32
srli a0, a0, 32
add a3, s3, s6
lui a4, %hi(temp)
addi a4, a4, %lo(temp)
j .LBB3_5
.LBB3_3:
srliw a5, a1, 1
add a5, a5, a2
.LBB3_4:
lw s1, 0(a3)
slli a5, a5, 2
add a5, a5, a4
sw s1, 0(a5)
addi a1, a1, 1
addi a3, a3, 4
beq a0, a1, .LBB3_7
.LBB3_5:
andi a5, a1, 1
bnez a5, .LBB3_3
srliw a5, a1, 1
j .LBB3_4
.LBB3_7:
blez s4, .LBB3_10
add a1, s3, s6
lui a2, %hi(temp)
addi a2, a2, %lo(temp)
.LBB3_9:
lw a3, 0(a2)
sw a3, 0(a1)
addi a2, a2, 4
addi a0, a0, -1
addi a1, a1, 4
bnez a0, .LBB3_9
.LBB3_10:
srliw a0, s4, 31
addw a0, s4, a0
sraiw s1, a0, 1
mv a0, s7
mv a1, s7
call multiply
mv s5, a0
mv a0, s3
mv a1, s2
mv a2, s1
mv a3, s5
call fft
addw a1, s1, s2
mv a0, s3
mv a2, s1
mv a3, s5
call fft
li a0, 2
blt s4, a0, .LBB3_13
slli a0, s1, 32
srli s4, a0, 32
add a0, s1, s2
slli a0, a0, 2
add s1, s3, a0
add s0, s3, s6
li a0, 1
lui a1, 70493
addiw s3, a1, -2031
lui a1, 243712
addiw s5, a1, 1
.LBB3_12:
lw s6, 0(s0)
lw a1, 0(s1)
sext.w s2, a0
mv a0, s2
call multiply
addw a1, a0, s6
mul a2, a1, s3
srli a3, a2, 63
srai a2, a2, 58
addw a2, a2, a3
mulw a2, a2, s5
subw a1, a1, a2
sw a1, 0(s0)
subw a0, s6, a0
addw a0, a0, s5
mul a1, a0, s3
srli a2, a1, 63
srai a1, a1, 58
addw a1, a1, a2
mulw a1, a1, s5
subw a0, a0, a1
sw a0, 0(s1)
mv a0, s2
mv a1, s7
call multiply
addi s1, s1, 4
addi s4, s4, -1
addi s0, s0, 4
bnez s4, .LBB3_12
.LBB3_13:
li a4, 0
.LBB3_14:
mv a0, a4
ld ra, 72(sp)
ld s0, 64(sp)
ld s1, 56(sp)
ld s2, 48(sp)
ld s3, 40(sp)
ld s4, 32(sp)
ld s5, 24(sp)
ld s6, 16(sp)
ld s7, 8(sp)
addi sp, sp, 80
ret
.Lfunc_end3:
.size fft, .Lfunc_end3-fft
.globl main
.p2align 1
.type main,@function
main:
addi sp, sp, -80
sd ra, 72(sp)
sd s0, 64(sp)
sd s1, 56(sp)
sd s2, 48(sp)
sd s3, 40(sp)
sd s4, 32(sp)
sd s5, 24(sp)
sd s6, 16(sp)
sd s7, 8(sp)
lui a0, %hi(a)
addi a0, a0, %lo(a)
call getarray
mv s0, a0
lui a0, %hi(b)
addi a0, a0, %lo(b)
call getarray
mv s1, a0
li a0, 61
call _sysy_starttime
addw a0, s0, s1
addiw s6, a0, -1
li a0, 1
.LBB4_1:
mv s1, a0
slliw a0, a0, 1
blt s1, s6, .LBB4_1
lui s4, %hi(d)
sw s1, %lo(d)(s4)
lui s5, 243712
divuw a1, s5, s1
li a0, 3
call power
mv a3, a0
lui a0, %hi(a)
addi s2, a0, %lo(a)
mv a0, s2
li a1, 0
mv a2, s1
call fft
lw s3, %lo(d)(s4)
divw a1, s5, s3
li a0, 3
call power
mv a3, a0
lui a0, %hi(b)
addi s7, a0, %lo(b)
mv a0, s7
li a1, 0
mv a2, s3
call fft
lwu s0, %lo(d)(s4)
sext.w s3, s0
blez s3, .LBB4_5
mv s1, s2
.LBB4_4:
lw a0, 0(s1)
lw a1, 0(s7)
call multiply
sw a0, 0(s1)
addi s7, s7, 4
addi s0, s0, -1
addi s1, s1, 4
bnez s0, .LBB4_4
.LBB4_5:
divw a0, s5, s3
subw a1, s5, a0
li a0, 3
call power
mv a3, a0
mv a0, s2
li a1, 0
mv a2, s3
call fft
lwu s0, %lo(d)(s4)
sext.w a0, s0
blez a0, .LBB4_8
lui a1, 243712
addiw a1, a1, -1
call power
mv s2, a0
lui a0, %hi(a)
addi s1, a0, %lo(a)
.LBB4_7:
lw a0, 0(s1)
mv a1, s2
call multiply
sw a0, 0(s1)
addi s0, s0, -1
addi s1, s1, 4
bnez s0, .LBB4_7
.LBB4_8:
li a0, 80
call _sysy_stoptime
lui a0, %hi(a)
addi a1, a0, %lo(a)
mv a0, s6
call putarray
li a0, 0
ld ra, 72(sp)
ld s0, 64(sp)
ld s1, 56(sp)
ld s2, 48(sp)
ld s3, 40(sp)
ld s4, 32(sp)
ld s5, 24(sp)
ld s6, 16(sp)
ld s7, 8(sp)
addi sp, sp, 80
ret
.Lfunc_end4:
.size main, .Lfunc_end4-main
.type temp,@object
.bss
.globl temp
.p2align 2
temp:
.zero 8388608
.size temp, 8388608
.type a,@object
.globl a
.p2align 2
a:
.zero 8388608
.size a, 8388608
.type b,@object
.globl b
.p2align 2
b:
.zero 8388608
.size b, 8388608
.type d,@object
.section .sbss,"aw",@nobits
.globl d
.p2align 2
d:
.word 0
.size d, 4
.type _sysy_start,@object
.bss
.globl _sysy_start
.p2align 3
_sysy_start:
.zero 16
.size _sysy_start, 16
.type _sysy_end,@object
.globl _sysy_end
.p2align 3
_sysy_end:
.zero 16
.size _sysy_end, 16
.type _sysy_l1,@object
.globl _sysy_l1
.p2align 2
_sysy_l1:
.zero 4096
.size _sysy_l1, 4096
.type _sysy_l2,@object
.globl _sysy_l2
.p2align 2
_sysy_l2:
.zero 4096
.size _sysy_l2, 4096
.type _sysy_h,@object
.globl _sysy_h
.p2align 2
_sysy_h:
.zero 4096
.size _sysy_h, 4096
.type _sysy_m,@object
.globl _sysy_m
.p2align 2
_sysy_m:
.zero 4096
.size _sysy_m, 4096
.type _sysy_s,@object
.globl _sysy_s
.p2align 2
_sysy_s:
.zero 4096
.size _sysy_s, 4096
.type _sysy_us,@object
.globl _sysy_us
.p2align 2
_sysy_us:
.zero 4096
.size _sysy_us, 4096
.type _sysy_idx,@object
.section .sbss,"aw",@nobits
.globl _sysy_idx
.p2align 2
_sysy_idx:
.word 0
.size _sysy_idx, 4
.type c,@object
.bss
.globl c
.p2align 2
c:
.zero 8388608
.size c, 8388608
.ident "Debian clang version 14.0.6"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym a
.addrsig_sym b