From 7f5736063d900c3962d62f08f42a81a351ec89d3 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Tue, 12 Mar 2019 16:05:23 +1100 Subject: [PATCH 1/3] [rnnoise] alter library to accept streamed samples --- src/rnnoise/include/rnnoise/rnnoise.h | 7 ++++++- src/rnnoise/src/denoise.c | 30 +++++++++++++++++++++------ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/rnnoise/include/rnnoise/rnnoise.h b/src/rnnoise/include/rnnoise/rnnoise.h index 8e41cc51..002c87c2 100644 --- a/src/rnnoise/include/rnnoise/rnnoise.h +++ b/src/rnnoise/include/rnnoise/rnnoise.h @@ -52,7 +52,12 @@ RNNOISE_EXPORT DenoiseState *rnnoise_create(); RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); -RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); +RNNOISE_EXPORT int rnnoise_get_needed(DenoiseState *st); + +RNNOISE_EXPORT int rnnoise_add_samples(DenoiseState *st, const float *in, int in_len); + +RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out); + #ifdef __cplusplus } diff --git a/src/rnnoise/src/denoise.c b/src/rnnoise/src/denoise.c index caf9a899..57e3e1d0 100644 --- a/src/rnnoise/src/denoise.c +++ b/src/rnnoise/src/denoise.c @@ -85,6 +85,9 @@ typedef struct { } CommonState; struct DenoiseState { + float input[FRAME_SIZE]; + int input_pos; + float analysis_mem[FRAME_SIZE]; float cepstral_mem[CEPS_MEM][NB_BANDS]; int memid; @@ -469,11 +472,27 @@ void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const } } -float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { +int rnnoise_get_needed(DenoiseState *st) { + return FRAME_SIZE - st->input_pos; +} + +int rnnoise_add_samples(DenoiseState *st, const float *in, int in_len) { + static const float a_hp[2] = {-1.99599, 0.99600}; + static const float b_hp[2] = {-2, 1}; + + const int needed = FRAME_SIZE - st->input_pos; + const int take = needed > in_len ? in_len : needed; + + biquad(st->input + st->input_pos, st->mem_hp_x, in, b_hp, a_hp, take); + st->input_pos += take; + + return take; +} + +float rnnoise_process_frame(DenoiseState *st, float *out) { int i; kiss_fft_cpx X[FREQ_SIZE]; kiss_fft_cpx P[WINDOW_SIZE]; - float x[FRAME_SIZE]; float Ex[NB_BANDS], Ep[NB_BANDS]; float Exp[NB_BANDS]; float features[NB_FEATURES]; @@ -481,10 +500,9 @@ float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { float gf[FREQ_SIZE]={1}; float vad_prob = 0; int silence; - static const float a_hp[2] = {-1.99599, 0.99600}; - static const float b_hp[2] = {-2, 1}; - biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE); - silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x); + + silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, st->input); + st->input_pos = 0; if (!silence) { compute_rnn(&st->rnn, g, &vad_prob, features); From 28c767a68a0196253b99b66fffef1a2c8df2cbc4 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Tue, 12 Mar 2019 16:06:50 +1100 Subject: [PATCH 2/3] [common] added resample support While this on the surface may not make much sense to do, according to Xiph the RnNoise neural net is computed specifically for 48KHz and as such would require a re-train simply to operate at a different sample rate. This patch also implements a ring buffer to avoid excessive memory operations. --- src/common/CMakeLists.txt | 6 + .../include/common/RnNoiseCommonPlugin.h | 33 ++- src/common/src/RnNoiseCommonPlugin.cpp | 228 +++++++++++++----- 3 files changed, 198 insertions(+), 69 deletions(-) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3535cfbe..5b5704dd 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -11,6 +11,12 @@ set(COMMON_SRC add_library(RnNoisePluginCommon STATIC ${COMMON_SRC}) +find_package(PkgConfig REQUIRED) +pkg_check_modules(SAMPLERATE REQUIRED samplerate) +target_link_libraries(RnNoisePluginCommon ${SAMPLERATE_LIBRARIES}) +target_include_directories(RnNoisePluginCommon PUBLIC ${SAMPLERATE_INCLUDE_DIRS}) +target_compile_options(RnNoisePluginCommon PUBLIC ${SAMPLERATE_CFLAGS_OTHER}) + target_link_libraries(RnNoisePluginCommon RnNoise) target_include_directories(RnNoisePluginCommon PUBLIC diff --git a/src/common/include/common/RnNoiseCommonPlugin.h b/src/common/include/common/RnNoiseCommonPlugin.h index 46c95fa7..cd52867e 100644 --- a/src/common/include/common/RnNoiseCommonPlugin.h +++ b/src/common/include/common/RnNoiseCommonPlugin.h @@ -3,30 +3,45 @@ #include #include +#include + struct DenoiseState; class RnNoiseCommonPlugin { public: + RnNoiseCommonPlugin(); + + void setSampleRate(unsigned long sampleRate); - void init(); + bool init(); void deinit(); + const char * getError() + { + return m_errorStr; + } + void process(const float *in, float *out, int32_t sampleFrames); private: + const char * m_errorStr; - void createDenoiseState(); + bool m_initialized; + bool m_resample; -private: static const int k_denoiseFrameSize = 480; static const int k_denoiseSampleRate = 48000; + std::shared_ptr m_srcIn; + std::shared_ptr m_srcOut; + double m_downRatio; + double m_upRatio; std::shared_ptr m_denoiseState; - std::vector m_inputBuffer; - std::vector m_outputBuffer; -}; - - - + std::vector m_inBuffer; + std::vector m_outBuffer; + size_t m_outBufferR; + size_t m_outBufferW; + size_t m_outBufferA; +}; \ No newline at end of file diff --git a/src/common/src/RnNoiseCommonPlugin.cpp b/src/common/src/RnNoiseCommonPlugin.cpp index 9f9af9b0..8ac21a6f 100644 --- a/src/common/src/RnNoiseCommonPlugin.cpp +++ b/src/common/src/RnNoiseCommonPlugin.cpp @@ -7,83 +7,191 @@ #include -void RnNoiseCommonPlugin::init() { - deinit(); - createDenoiseState(); +RnNoiseCommonPlugin::RnNoiseCommonPlugin() : + m_errorStr (NULL), + m_initialized(false), + m_resample (false) +{ } -void RnNoiseCommonPlugin::deinit() { - m_denoiseState.reset(); +void RnNoiseCommonPlugin::setSampleRate(unsigned long sampleRate) +{ + m_downRatio = (double)k_denoiseSampleRate / (double)sampleRate; + m_upRatio = (double)sampleRate / (double)k_denoiseSampleRate; + m_resample = sampleRate != 48000; } -void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) { - if (sampleFrames == 0) { - return; - } - - if (!m_denoiseState) { - createDenoiseState(); - } +bool RnNoiseCommonPlugin::init() { + int err; - // Good case, we can copy less data around and rnnoise lib is built for it - if (sampleFrames == k_denoiseFrameSize) { - m_inputBuffer.resize(sampleFrames); + if (m_initialized) + deinit(); - for (size_t i = 0; i < sampleFrames; i++) { - m_inputBuffer[i] = in[i] * std::numeric_limits::max(); - } + m_srcIn = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_errorStr = src_strerror(err); + return false; + } + + m_srcOut = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_srcIn.reset(); + m_errorStr = src_strerror(err); + return false; + } + + m_denoiseState = std::shared_ptr( + rnnoise_create(), + [](DenoiseState *st) + { + rnnoise_destroy(st); + } + ); - rnnoise_process_frame(m_denoiseState.get(), out, &m_inputBuffer[0]); + src_set_ratio(m_srcIn.get(), m_downRatio); + src_set_ratio(m_srcOut .get(), m_upRatio ); - for (size_t i = 0; i < sampleFrames; i++) { - out[i] /= std::numeric_limits::max(); - } - } else { - m_inputBuffer.resize(m_inputBuffer.size() + sampleFrames); + m_inBuffer .resize(k_denoiseFrameSize); + m_outBuffer.resize(k_denoiseFrameSize * 2); + m_outBufferR = 0; + m_outBufferW = 0; + m_outBufferA = 0; - // From [-1.f,1.f] range to [min short, max short] range which rnnoise lib will understand - { - float *inputBufferWriteStart = (m_inputBuffer.end() - sampleFrames).base(); - for (size_t i = 0; i < sampleFrames; i++) { - inputBufferWriteStart[i] = in[i] * std::numeric_limits::max(); - } - } + m_initialized = true; + m_errorStr = NULL; + return true; +} - const size_t samplesToProcess = m_inputBuffer.size() / k_denoiseFrameSize; - const size_t framesToProcess = samplesToProcess * k_denoiseFrameSize; +void RnNoiseCommonPlugin::deinit() { + m_denoiseState.reset(); + m_srcIn .reset(); + m_srcOut .reset(); + m_initialized = false; +} - m_outputBuffer.resize(m_outputBuffer.size() + framesToProcess); +void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) +{ + const float mul = 1.0f / std::numeric_limits::max(); + if (!sampleFrames) + return; + + if (!m_initialized) + init(); + + SRC_DATA srcIn; + srcIn.data_in = in; + srcIn.input_frames = sampleFrames; + srcIn.end_of_input = 0; + srcIn.src_ratio = m_downRatio; + srcIn.data_out = &m_inBuffer[0]; + srcIn.output_frames = m_inBuffer.size(); + + SRC_DATA srcOut; + srcOut.data_out = out; + srcOut.output_frames = sampleFrames; + srcOut.end_of_input = 0; + srcOut.src_ratio = m_upRatio; + + long frames = 0; + while(srcIn.input_frames) + { + if (m_resample) + { + // resample the samples and then scale them + src_process(m_srcIn.get(), &srcIn); + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] *= std::numeric_limits::max(); + } + else + { + // just copy the data and scale it + srcIn.input_frames_used = srcIn.input_frames; + if (srcIn.input_frames_used > srcIn.output_frames) + srcIn.input_frames_used = srcIn.output_frames; + srcIn.output_frames_gen = srcIn.input_frames_used; + + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] = in[i] * std::numeric_limits::max(); + } - // Process input buffer by chunks of k_denoiseFrameSize, put result into out buffer to return into range [-1.f,1.f] + srcIn.data_in += srcIn.input_frames_used; + srcIn.input_frames -= srcIn.input_frames_used; + + float *denoise_in = &m_inBuffer[0]; + while(srcIn.output_frames_gen) + { + const int wrote = rnnoise_add_samples(m_denoiseState.get(), denoise_in, srcIn.output_frames_gen); + denoise_in += wrote; + srcIn.output_frames_gen -= wrote; + + if (rnnoise_get_needed(m_denoiseState.get()) == 0) + { + rnnoise_process_frame(m_denoiseState.get(), &m_outBuffer[m_outBufferW]); + + // scale the levels back to normal + for(int32_t i = 0; i < k_denoiseFrameSize; ++i) + m_outBuffer[m_outBufferW + i] *= mul; + + m_outBufferW += k_denoiseFrameSize; + m_outBufferA += k_denoiseFrameSize; + if (m_outBufferW == m_outBuffer.size()) + m_outBufferW = 0; + } + + // resample what we can to the output + while(m_outBufferA && srcOut.output_frames) + { + srcOut.data_in = &m_outBuffer[m_outBufferR]; + srcOut.input_frames = m_outBufferW < m_outBufferR ? m_outBuffer.size() - m_outBufferR : m_outBufferW - m_outBufferR; + + if (m_resample) + src_process(m_srcOut.get(), &srcOut); + else { - float *outBufferWriteStart = (m_outputBuffer.end() - framesToProcess).base(); - - for (size_t i = 0; i < samplesToProcess; i++) { - float *currentOutBuffer = &outBufferWriteStart[i * k_denoiseFrameSize]; - float *currentInBuffer = &m_inputBuffer[i * k_denoiseFrameSize]; - rnnoise_process_frame(m_denoiseState.get(), currentOutBuffer, currentInBuffer); - - for (size_t j = 0; j < k_denoiseFrameSize; j++) { - currentOutBuffer[j] /= std::numeric_limits::max(); - } - } + // simply copy the buffer if we are not resampling + srcOut.input_frames_used = srcOut.input_frames; + if (srcOut.input_frames_used > srcOut.output_frames) + srcOut.input_frames_used = srcOut.output_frames; + memcpy(srcOut.data_out, srcOut.data_in, srcOut.input_frames_used * sizeof(float)); } - const size_t toCopyIntoOutput = std::min(m_outputBuffer.size(), static_cast(sampleFrames)); + if (!srcOut.input_frames_used && !srcOut.output_frames_gen) + break; - std::memcpy(out, &m_outputBuffer[0], toCopyIntoOutput * sizeof(float)); + m_outBufferR += srcOut.input_frames_used; + m_outBufferA -= srcOut.input_frames_used; - m_inputBuffer.erase(m_inputBuffer.begin(), m_inputBuffer.begin() + framesToProcess); - m_outputBuffer.erase(m_outputBuffer.begin(), m_outputBuffer.begin() + toCopyIntoOutput); + srcOut.data_out += srcOut.output_frames_gen; + srcOut.output_frames -= srcOut.output_frames_gen; + frames += srcOut.output_frames_gen; - if (toCopyIntoOutput < sampleFrames) { - std::fill(out + toCopyIntoOutput, out + sampleFrames, 0.f); - } + if (m_outBufferR == m_outBuffer.size()) + m_outBufferR = 0; + } } -} - -void RnNoiseCommonPlugin::createDenoiseState() { - m_denoiseState = std::shared_ptr(rnnoise_create(), [](DenoiseState *st) { - rnnoise_destroy(st); - }); + } + + // if we generated less frames then wanted, pad them across to the right + if (frames && frames < sampleFrames) + { + const size_t pad = sampleFrames - frames; + memmove(out + pad, out, frames); + memset(out, 0, pad); + } } \ No newline at end of file From bed6d78a192ac16c9db09b523cb6a3c1802fc1aa Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Tue, 12 Mar 2019 16:32:44 +1100 Subject: [PATCH 3/3] [ladspa/lv2] add sample rate support --- src/ladspa_plugin/RnNoiseLadspaPlugin.h | 7 +++++-- src/lv2_plugin/RnNoiseLv2Plugin.cpp | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ladspa_plugin/RnNoiseLadspaPlugin.h b/src/ladspa_plugin/RnNoiseLadspaPlugin.h index 28a8e8d8..f0b6fb8a 100644 --- a/src/ladspa_plugin/RnNoiseLadspaPlugin.h +++ b/src/ladspa_plugin/RnNoiseLadspaPlugin.h @@ -32,8 +32,9 @@ struct RnNoiseMono { nullptr // implementation data }; - RnNoiseMono() { + RnNoiseMono(sample_rate_t sr) { m_rnNoisePlugin.init(); + m_rnNoisePlugin.setSampleRate(sr); } ~RnNoiseMono() { @@ -82,9 +83,11 @@ struct RnNoiseStereo { nullptr // implementation data }; - RnNoiseStereo() { + RnNoiseStereo(sample_rate_t sr) { m_rnNoisePluginL.init(); m_rnNoisePluginR.init(); + m_rnNoisePluginL.setSampleRate(sr); + m_rnNoisePluginR.setSampleRate(sr); } ~RnNoiseStereo() { diff --git a/src/lv2_plugin/RnNoiseLv2Plugin.cpp b/src/lv2_plugin/RnNoiseLv2Plugin.cpp index 7588652c..3ee04b7c 100644 --- a/src/lv2_plugin/RnNoiseLv2Plugin.cpp +++ b/src/lv2_plugin/RnNoiseLv2Plugin.cpp @@ -7,6 +7,7 @@ RnNoiseLv2Plugin::RnNoiseLv2Plugin(double sample_rate, const char *bundle_path, (*valid) = true; m_rnNoisePlugin = std::make_unique(); + m_rnNoisePlugin->setSampleRate(sample_rate); } @@ -47,4 +48,4 @@ void RnNoiseLv2Plugin::deactivate() { PluginBase::deactivate(); m_rnNoisePlugin->deinit(); -} +} \ No newline at end of file