Skip to content

Commit

Permalink
Merge pull request #71 from mbertuletti/cfft_merge
Browse files Browse the repository at this point in the history
Cfft merge
  • Loading branch information
xiaywang authored Jul 21, 2022
2 parents d6e144f + 7aa9f87 commit c50f6e3
Show file tree
Hide file tree
Showing 73 changed files with 8,885 additions and 32,272 deletions.
23 changes: 13 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,13 @@ FC_SRCS = \
src/TransformFunctions/plp_cfft_q16.c src/TransformFunctions/kernels/plp_cfft_q16s_rv32im.c \
src/TransformFunctions/plp_cfft_q16_parallel.c \
src/TransformFunctions/plp_cfft_q32.c src/TransformFunctions/kernels/plp_cfft_q32s_rv32im.c \
src/TransformFunctions/plp_cfft_q32_parallel.c \
src/TransformFunctions/plp_rfft_f32.c \
src/TransformFunctions/plp_rfft_f32_parallel.c \
src/TransformFunctions/plp_rfftfast_f32.c \
src/TransformFunctions/plp_rfftfast_f32_parallel.c \
src/TransformFunctions/plp_cfft_f32.c \
src/TransformFunctions/plp_cfft_f32_parallel.c \
src/TransformFunctions/plp_cfft_f32_parallel.c \
src/TransformFunctions/plp_dct2_f32.c \
src/TransformFunctions/plp_dct2_f32_parallel.c \
src/TransformFunctions/plp_mfcc_f32.c \
Expand Down Expand Up @@ -583,18 +586,17 @@ CL_SRCS = \
src/MatrixFunctions/mat_mult_trans_cmplx/kernels/plp_mat_mult_trans_cmplx_q8p_xpulpv2.c \
src/MatrixFunctions/mat_mult_trans_cmplx/kernels/plp_mat_mult_trans_cmplx_f32s_xpulpv2.c \
src/MatrixFunctions/mat_mult_trans_cmplx/kernels/plp_mat_mult_trans_cmplx_f32p_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfft_f32_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_f32_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfftfast_f32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfftfast_f32p_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfft_f32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfft_f32p_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_f32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_f32p_xpulpv2.c \
src/TransformFunctions/kernels/plp_bitreversal_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q16s_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q16p_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q32p_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfft_f32_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_f32_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q16s_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q16p_xpulpv2.c \
src/TransformFunctions/kernels/plp_cfft_q32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_rfft_f32_xpulpv2.c \
src/TransformFunctions/kernels/plp_dwt_f32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_dwt_q32s_xpulpv2.c \
src/TransformFunctions/kernels/plp_dwt_q16s_xpulpv2.c \
Expand Down Expand Up @@ -811,10 +813,11 @@ CL_SRCS = \

IDIR=$(CURDIR)/include
PULP_CFLAGS += -I$(IDIR) -O3 -g
#PULP_LDFLAGS += -lplpdsp -lm

ifeq ($(PULP_RTOS), pmsis)
# PMSIS rules
PULP_STATIC_LIB = plpdsp
PULP_STATIC_LIB = plpdsp -lm
PULP_CFLAGS += -DRTOS_PMSIS
PULP_STATIC_LIB_SRCS = $(FC_SRCS) $(CL_SRCS)
PULP_STATIC_LIB_HEADERS += $(shell find include -name *.h)
Expand Down
91 changes: 91 additions & 0 deletions genBitsReversal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@

import math
import argparse
from sympy.combinatorics import Permutation

def bits_for_value(value):
return int(math.log2(value))

def decompose(N, R):
logN2 = bits_for_value(N)
logR2 = []

while (N >= R):
logR2.append(bits_for_value(R))
N = N // R

if (N > 1):
logR2.append(bits_for_value(N))

return (logN2, logR2)

def reverse_bits(x, n, bits_list):
result = 0
for bits in bits_list:
mask = (0xffffffff >> (32 - bits))
result = (result << bits) | (x & mask)
x = x >> bits
return result

def create_transpositions(N, R):
(logN2, logR2) = decompose(N, R)

indexes = []
for n in range(N):
indexes.append(reverse_bits(n, logN2, logR2))

# Create transpositions table
tps = []
for c in Permutation.from_sequence(indexes).cyclic_form:
for i in range(len(c) - 1):
tps.append([c[i] * 8, c[-1] * 8])

return tps

def transpositions_stringify(N, R, tps):
MAX_LINE_LEN = 79
MAX_FFT_IN_U16 = 8192

index_type = 'uint16_t' if N <= MAX_FFT_IN_U16 else 'uint32_t'
tps_elements_count = len(tps) * 2

out = '#define ARMBITREVINDEXTABLE_{}_TABLE_LENGTH {}\n'.format(N, tps_elements_count)
out += 'const {} armBitRevIndexTable{}[ARMBITREVINDEXTABLE_{}_TABLE_LENGTH] = {{\n'.format(index_type, N, N)

line = ''
for tp in tps:
entry = '{},{}'.format(tp[0], tp[1])

# Append to line
exp_line_len = len(line) + len(entry) + len(', ,')

if (line == ''):
line = ' ' + entry
elif (exp_line_len >= MAX_LINE_LEN):
out += line + ',\n'
line = ' ' + entry
else:
line += ', ' + entry

out += line + '\n};'
return out

parser = argparse.ArgumentParser(description='Generate bits reversal tables',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('filename', metavar='out.c', nargs='?', help='output file name')
parser.add_argument('--size', type=int, default=8192, help='size')
parser.add_argument('--radix', type=int, default=8, choices=[2, 8],
help='radix | use 2 for Radix 4 and 4x2 | use 8 for Radix 8, 8x4, 8x2')

args = parser.parse_args()

tps = create_transpositions(args.size, args.radix)
out = transpositions_stringify(args.size, args.radix, tps)

if (args.filename == None):
print(out)
else:
f = open(args.filename, 'w')
f.write(out)
f.close()
106 changes: 106 additions & 0 deletions gen_twiddles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
## TWIDDLES ##

## RADIX 2
#import cmath as C
#def rfft_twiddles(FFTLength: int):
# N = FFTLength//2
# twiddles = [C.exp(-1*C.pi*1j*k/N) for k in range(N)]
# for i in range(N):
# if not i%3:
# print("\n ", end='')
# print("{"+" {: .8f}f, ".format(twiddles[i].real), end='')
# print("{: .8f}f".format(twiddles[i].imag)+" },", end='')
# print(" ", end='')
# print("")
#rfft_twiddles(128)

# RADIX 4
#import cmath as C
#def cfft_twiddles(FFTLength: int):
# N = FFTLength
# twiddles = [C.exp(-2*C.pi*1j*k/N) for k in range(N)]
# for i in range((int)(N)):
# if not i%3:
# print("\n ", end='')
# print("{"+" {: .8f}f, ".format(twiddles[i].real), end='')
# print("{: .8f}f".format(twiddles[i].imag)+" },", end='')
# print(" ", end='')
# print("")
#cfft_twiddles(16)

# RADIX 8
#import cmath as C
#def cfft_twiddles(FFTLength: int):
# N = FFTLength
# twiddles = [C.exp(-2*C.pi*1j*k/N) for k in range(N)]
# for i in range((int)(N)):
# if not i%3:
# print("\n ", end='')
# print("{"+" {: .8f}f, ".format(twiddles[i].real), end='')
# print("{: .8f}f".format(twiddles[i].imag)+" },", end='')
# print(" ", end='')
# print("")
#cfft_twiddles(64)


## q16
#import math as M
#N = 65536
#PI = 3.14159265358979
#for i in range(0, (int)(3*N/4)):
# twiddleCoefq15_cos = M.cos(i * 2*PI/N)
# twiddleCoefq15_sin = M.sin(i * 2*PI/N)
# if not i%5:
# print("\n ", end='')
# print("(int16_t) 0x{:04X}, " .format(int(round(twiddleCoefq15_cos*(2**15)))&0xffff), end='')
# print("(int16_t) 0x{:04X}, " .format(int(round(twiddleCoefq15_sin*(2**15)))&0xffff), end='')
#print("\n")

# RADIX 4
# 16, 256, 1024
#N = 16
#for i in range(1, N):
# n1 = int(i/4)
# n2 = int((i%4)/2)
# n3 = i - 4*n1 - 2*n2
# reversal = (4*n3 +2*n2 + n1)
# if not i%16:
# print("\n ", end='')
# print("{:2}".format(int(reversal)), end=', ')
#print("\n")



## BIT REVERSE ##

# RADIX 2
# 32, 128, 2048
# python script for generating these LUTs
# replace 9 with log(FFTLength) (both occurrances!)
# after pasting, use gqq in vim to separate onto different lines
#for i in range(2**7):
# if not i%16:
# print("\n ", end='')
# print(int("{:0>7}".format(bin(i)[2:])[::-1],2), end=', ')
#print("\n ", end='')

# RADIX 4
# 16, 256, 1024
N = 2048
for i in range(N):
reversal = 256*(i%4)+ 64*(int)((i%16)/4)+ 16*(int)((i%64)/16)+ 4*(int)((i%256)/64)+ 1*(int)(i/256)
# reversal = 64*(i%4)+ 16*(int)((i%16)/4)+ 4*(int)((i%64)/16)+ 1*(int)(i/64)
# reversal = 4*(i%4)+ 1*(int)(i/4)
if not i%16:
print("\n ", end='')
print("{:2}".format(int(reversal)), end=', ')

# RADIX 8
# 512, 64
#N = 64
#for i in range(N):
# #reversal = 64*(i%8)+ 8*(int)((i%64)/8)+ 1*(int)(i/64)
# reversal = 8*(i%8)+ 1*(int)(i/8)
# if not i%16:
# print("\n ", end='')
# print("{:3}".format(int(reversal)), end=', ')
57 changes: 53 additions & 4 deletions include/plp_common_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include "plp_math.h"

/* Fixed-point cfft */

extern const int16_t twiddleCoef_16_q16[24];
extern const int16_t twiddleCoef_32_q16[48];
extern const int16_t twiddleCoef_64_q16[96];
Expand Down Expand Up @@ -67,19 +69,66 @@ extern const uint16_t plpBitRevIndexTable_fixed_1024[PLPBITREVINDEXTABLE_FIXED_1
extern const uint16_t plpBitRevIndexTable_fixed_2048[PLPBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
extern const uint16_t plpBitRevIndexTable_fixed_4096[PLPBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];

extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
extern const int32_t sinTable_q32[FAST_MATH_TABLE_SIZE + 1];
extern const int16_t sinTable_q16[FAST_MATH_TABLE_SIZE + 1];
/* Floating-point cfft */

extern const Complex_type_f32 twiddleCoef_cfft_16[16];
extern const Complex_type_f32 twiddleCoef_cfft_32[32];
extern const Complex_type_f32 twiddleCoef_cfft_64[64];
extern const Complex_type_f32 twiddleCoef_cfft_128[128];
extern const Complex_type_f32 twiddleCoef_cfft_256[256];
extern const Complex_type_f32 twiddleCoef_cfft_512[512];
extern const Complex_type_f32 twiddleCoef_cfft_1024[1024];
extern const Complex_type_f32 twiddleCoef_cfft_2048[2048];

/* Floating-point rfft */

extern const uint16_t plpBitRevIndexTable_rfftfast_16[20];
extern const uint16_t plpBitRevIndexTable_rfftfast_32[48];
extern const uint16_t plpBitRevIndexTable_rfftfast_64[56];
extern const uint16_t plpBitRevIndexTable_rfftfast_128[208];
extern const uint16_t plpBitRevIndexTable_rfftfast_256[440];
extern const uint16_t plpBitRevIndexTable_rfftfast_512[448];
extern const uint16_t plpBitRevIndexTable_rfftfast_1024[1800];

extern const Complex_type_f32 twiddleCoef_rfftfast_32[16];
extern const Complex_type_f32 twiddleCoef_rfftfast_64[32];
extern const Complex_type_f32 twiddleCoef_rfftfast_128[64];
extern const Complex_type_f32 twiddleCoef_rfftfast_256[128];
extern const Complex_type_f32 twiddleCoef_rfftfast_512[256];
extern const Complex_type_f32 twiddleCoef_rfftfast_1024[512];
extern const Complex_type_f32 twiddleCoef_rfftfast_2048[1024];

extern const Complex_type_f32 twiddleCoef_rfft_32[16];
extern const Complex_type_f32 twiddleCoef_rfft_64[32];
extern const Complex_type_f32 twiddleCoef_rfft_128[64];
extern const Complex_type_f32 twiddleCoef_rfft_256[128];
extern const Complex_type_f32 twiddleCoef_rfft_512[256];
extern const Complex_type_f32 twiddleCoef_rfft_1024[512];
extern const Complex_type_f32 twiddleCoef_rfft_2048[1024];


extern short bit_rev_radix2_LUT_32[32];
extern short bit_rev_radix2_LUT_64[64];
extern short bit_rev_radix2_LUT_128[128];
extern short bit_rev_radix2_LUT_256[256];
extern short bit_rev_radix2_LUT_512[512];
extern short bit_rev_radix2_LUT[2048];
extern short bit_rev_radix2_LUT_1024[1024];
extern short bit_rev_radix2_LUT_2048[2048];

extern short bit_rev_radix4_LUT_256[256];
extern short bit_rev_radix4_LUT_1024[1024];

extern short bit_rev_radix8_LUT_16[16];
extern short bit_rev_radix8_LUT_64[64];
extern short bit_rev_radix8_LUT_512[512];

extern short bit_rev_radix_2by4_LUT_32[32];
extern short bit_rev_radix_2by4_LUT_128[128];
extern short bit_rev_radix_2by4_LUT_2048[2048];

extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
extern const int32_t sinTable_q32[FAST_MATH_TABLE_SIZE + 1];
extern const int16_t sinTable_q16[FAST_MATH_TABLE_SIZE + 1];

extern const float32_t melFbTable_f32_23[25];
extern const float32_t melFbTable_f32_26[28];
Expand Down
33 changes: 32 additions & 1 deletion include/plp_const_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include "plp_common_tables.h"
#include "plp_math.h"

/* Fixed-point cfft */

extern const plp_cfft_instance_q16 plp_cfft_sR_q16_len16;
extern const plp_cfft_instance_q16 plp_cfft_sR_q16_len32;
extern const plp_cfft_instance_q16 plp_cfft_sR_q16_len64;
Expand All @@ -48,14 +50,43 @@ extern const plp_cfft_instance_q32 plp_cfft_sR_q32_len1024;
extern const plp_cfft_instance_q32 plp_cfft_sR_q32_len2048;
extern const plp_cfft_instance_q32 plp_cfft_sR_q32_len4096;

/* Floating-point cfft */

extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len16;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len32;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len64;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len128;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len256;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len512;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len1024;
extern const plp_cfft_instance_f32 plp_cfft_sR_f32_len2048;

/* Floating-point rfft */

extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len16;
extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len64;
extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len128;
extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len256;
extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len512;
extern const plp_cfft_instance_f32 plp_cfft_fast_sR_f32_len1024;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len32;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len64;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len128;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len256;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len512;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len1024;
extern const plp_fft_fast_instance_f32 plp_rfft_fast_sR_f32_len2048;

extern const plp_fft_instance_f32 plp_rfft_sR_f32_len32;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len64;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len128;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len256;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len512;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len1024;
extern const plp_fft_instance_f32 plp_rfft_sR_f32_len2048;

extern const plp_triangular_filter_f32 plp_triangular_filter_f32_32;


extern const plp_dwt_wavelet_f32 PLP_DWT_COIF1_f32;
extern const plp_dwt_wavelet_f32 PLP_DWT_COIF2_f32;
extern const plp_dwt_wavelet_f32 PLP_DWT_COIF3_f32;
Expand Down
Loading

0 comments on commit c50f6e3

Please sign in to comment.