From 1bd56b1ce2096f1d02afd34d3211aa5cd0aedab5 Mon Sep 17 00:00:00 2001 From: Davide Beatrici Date: Thu, 18 Jul 2024 01:40:05 +0200 Subject: [PATCH] REFAC(client): Introduce AudioPreprocessor --- src/mumble/AudioPreprocessor.cpp | 178 +++++++++++++++++++++++++ src/mumble/AudioPreprocessor.h | 215 +++++++++++++++++++++++++++++++ src/mumble/CMakeLists.txt | 2 + 3 files changed, 395 insertions(+) create mode 100644 src/mumble/AudioPreprocessor.cpp create mode 100644 src/mumble/AudioPreprocessor.h diff --git a/src/mumble/AudioPreprocessor.cpp b/src/mumble/AudioPreprocessor.cpp new file mode 100644 index 00000000000..d5a256ef075 --- /dev/null +++ b/src/mumble/AudioPreprocessor.cpp @@ -0,0 +1,178 @@ +// Copyright 2024 The Mumble Developers. All rights reserved. +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file at the root of the +// Mumble source tree or at . + +#include "AudioPreprocessor.h" + +#include + +#include + +AudioPreprocessor::AudioPreprocessor(AudioPreprocessor &&other) : m_handle(std::exchange(other.m_handle, nullptr)) { +} + +AudioPreprocessor::~AudioPreprocessor() { + deinit(); +} + +AudioPreprocessor &AudioPreprocessor::operator=(AudioPreprocessor &&other) { + m_handle = std::exchange(other.m_handle, nullptr); + return *this; +} + +bool AudioPreprocessor::init(const std::uint32_t sampleRate, const std::uint32_t quantum) { + deinit(); + + m_handle = speex_preprocess_state_init(static_cast< int >(quantum), static_cast< int >(sampleRate)); + return m_handle != nullptr; +} + +void AudioPreprocessor::deinit() { + if (m_handle) { + speex_preprocess_state_destroy(m_handle); + } +} + +bool AudioPreprocessor::run(std::int16_t &samples) { + return speex_preprocess_run(m_handle, &samples); +} + +SpeexEchoState_ *AudioPreprocessor::getEchoState() { + SpeexEchoState_ *handle; + return speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_ECHO_STATE, &handle) == 0 ? handle : nullptr; +} + +bool AudioPreprocessor::setEchoState(SpeexEchoState_ *handle) { + return speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_SET_ECHO_STATE, handle) == 0; +} + +bool AudioPreprocessor::usesAGC() const { + return getBool(SPEEX_PREPROCESS_GET_AGC); +} + +bool AudioPreprocessor::toggleAGC(const bool enable) { + return setBool(SPEEX_PREPROCESS_SET_AGC, enable); +} + +std::int32_t AudioPreprocessor::getAGCDecrement() const { + return getInt32(SPEEX_PREPROCESS_GET_AGC_DECREMENT); +} + +bool AudioPreprocessor::setAGCDecrement(const std::int32_t value) { + return setInt32(SPEEX_PREPROCESS_SET_AGC_DECREMENT, value); +} + +std::int32_t AudioPreprocessor::getAGCGain() const { + return getInt32(SPEEX_PREPROCESS_GET_AGC_GAIN); +} + +std::int32_t AudioPreprocessor::getAGCIncrement() const { + return getInt32(SPEEX_PREPROCESS_GET_AGC_INCREMENT); +} + +bool AudioPreprocessor::setAGCIncrement(const std::int32_t value) { + return setInt32(SPEEX_PREPROCESS_SET_AGC_INCREMENT, value); +} + +std::int32_t AudioPreprocessor::getAGCMaxGain() const { + return getInt32(SPEEX_PREPROCESS_GET_AGC_MAX_GAIN); +} + +bool AudioPreprocessor::setAGCMaxGain(const std::int32_t value) { + return setInt32(SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, value); +} + +std::int32_t AudioPreprocessor::getAGCTarget() const { + return getInt32(SPEEX_PREPROCESS_GET_AGC_TARGET); +} + +bool AudioPreprocessor::setAGCTarget(const std::int32_t value) { + return setInt32(SPEEX_PREPROCESS_SET_AGC_TARGET, value); +} + +bool AudioPreprocessor::usesDenoise() const { + return getBool(SPEEX_PREPROCESS_GET_DENOISE); +} + +bool AudioPreprocessor::toggleDenoise(const bool enable) { + return setBool(SPEEX_PREPROCESS_SET_DENOISE, enable); +} + +bool AudioPreprocessor::usesDereverb() const { + return getBool(SPEEX_PREPROCESS_GET_DEREVERB); +} + +bool AudioPreprocessor::toggleDereverb(const bool enable) { + return setBool(SPEEX_PREPROCESS_SET_DEREVERB, enable); +} + +std::int32_t AudioPreprocessor::getNoiseSuppress() const { + return getInt32(SPEEX_PREPROCESS_GET_NOISE_SUPPRESS); +} + +bool AudioPreprocessor::setNoiseSuppress(const std::int32_t value) { + return setInt32(SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, value); +} + +AudioPreprocessor::psd_t AudioPreprocessor::getPSD() const { + const auto size = getInt32(SPEEX_PREPROCESS_GET_PSD_SIZE); + if (!size) { + return {}; + } + + psd_t ret(static_cast< size_t >(size)); + if (speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_PSD, ret.data()) != 0) { + return {}; + } + + return ret; +} + +AudioPreprocessor::psd_t AudioPreprocessor::getNoisePSD() const { + const auto size = getInt32(SPEEX_PREPROCESS_GET_PSD_SIZE); + if (!size) { + return {}; + } + + psd_t ret(static_cast< size_t >(size)); + if (speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_NOISE_PSD, ret.data()) != 0) { + return {}; + } + + return ret; +} + +std::int32_t AudioPreprocessor::getSpeechProb() const { + return getInt32(SPEEX_PREPROCESS_GET_PROB); +} + +bool AudioPreprocessor::usesVAD() const { + return getBool(SPEEX_PREPROCESS_GET_VAD); +} + +bool AudioPreprocessor::toggleVAD(const bool enable) { + return setBool(SPEEX_PREPROCESS_SET_VAD, enable); +} + +bool AudioPreprocessor::getBool(const int op) const { + const auto val = getInt32(op); + return static_cast< bool >(val); +} + +bool AudioPreprocessor::setBool(const int op, const bool value) { + return setInt32(op, value); +} + +std::int32_t AudioPreprocessor::getInt32(const int op) const { + spx_int32_t value; + if (speex_preprocess_ctl(m_handle, op, &value) != 0) { + return 0; + } + + return value; +} + +bool AudioPreprocessor::setInt32(const int op, std::int32_t value) { + return speex_preprocess_ctl(m_handle, op, &value) == 0; +} diff --git a/src/mumble/AudioPreprocessor.h b/src/mumble/AudioPreprocessor.h new file mode 100644 index 00000000000..b173ef301b0 --- /dev/null +++ b/src/mumble/AudioPreprocessor.h @@ -0,0 +1,215 @@ +// Copyright 2024 The Mumble Developers. All rights reserved. +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file at the root of the +// Mumble source tree or at . + +#ifndef MUMBLE_MUMBLE_AUDIOPREPROCESSOR_H_ +#define MUMBLE_MUMBLE_AUDIOPREPROCESSOR_H_ + +#include +#include + +struct SpeexEchoState_; +struct SpeexPreprocessState_; + +class AudioPreprocessor { +public: + using psd_t = std::vector< std::int32_t >; + + /** + * \return Whether the object is initialized and all its methods can be safely called. + */ + constexpr explicit operator bool() const { return m_handle != nullptr; } + + AudioPreprocessor() = default; + AudioPreprocessor(AudioPreprocessor &&other); + ~AudioPreprocessor(); + + AudioPreprocessor &operator=(AudioPreprocessor &&other); + + /** + * Initializes the object. This must be called at least once before any other function! + * + * \param sampleRate The sample rate in Hz. + * + * \param quantum The number of samples that are expected for each preprocess cycle. + * Should correspond to 10-20 ms (480-960 with a sample rate of 48000). + * + * \return Whether initialization succeeded. + */ + bool init(std::uint32_t sampleRate, std::uint32_t quantum); + /** + * Deinitializes the object, doesn't do anything if it's not initialized. + * Once that is done no function other than init() can be called. + */ + void deinit(); + + /** + * Runs the preprocessor, which analyzes and modifies the samples. + * + * \param samples The samples that should be processed (modified in-place). + * + * \return Whether speech was detected, if VAD is enabled. Always 'true' otherwise. + */ + bool run(std::int16_t &samples); + + /** + * \return Handle to the echo state object. + */ + SpeexEchoState_ *getEchoState(); + /** + * Set the echo state object, to toggle the echo canceller. + * + * \param handle Handle to the echo state. Passing 'nullptr' disables the feature. + * + * \return Whether the operation succeeded. + */ + bool setEchoState(SpeexEchoState_ *handle); + + /** + * \return Whether automatic gain control is enabled. + */ + bool usesAGC() const; + /** + * Toggles automatic gain control. + * + * \param enable 'true' to enable the feature, otherwise 'false'. + * + * \return Whether the operation succeeded. + */ + bool toggleAGC(bool enable); + + /** + * \return The maximal gain decrease in dB/second. + */ + std::int32_t getAGCDecrement() const; + /** + * Set the maximal gain decrease in dB/second. + * + * \param value Maximal gain decrease in dB/second. + * + * \return Whether the operation succeeded. + */ + bool setAGCDecrement(std::int32_t value); + /** + * \return The current gain in percentual scale (0-100). + */ + std::int32_t getAGCGain() const; + + /** + * \return The maximal gain increase in dB/second. + */ + std::int32_t getAGCIncrement() const; + /** + * Set the maximal gain increase in dB/second. + * + * \param value Maximal gain increase in dB/second. + * + * \return Whether the operation succeeded. + */ + bool setAGCIncrement(std::int32_t value); + /** + * \return The maximal gain in dB. + */ + std::int32_t getAGCMaxGain() const; + /** + * Set the maximal gain in dB. + * + * \param value Maximal gain in dB. + * + * \return Whether the operation succeeded. + */ + bool setAGCMaxGain(std::int32_t value); + /** + * \return The automatic gain control level, in a scale from 1 to 32768. + */ + std::int32_t getAGCTarget() const; + /** + * Set the automatic gain control level. + * + * \param value Automatic gain control level, in a scale from 1 to 32768. + * + * \return Whether the operation succeeded. + */ + bool setAGCTarget(std::int32_t value); + + /** + * \return Whether denoise is enabled. + */ + bool usesDenoise() const; + /** + * Toggles denoise. + * + * \param enable 'true' to enable the feature, otherwise 'false'. + * + * \return Whether the operation succeeded. + */ + bool toggleDenoise(bool enable); + + /** + * \return Whether dereverb is enabled. + */ + bool usesDereverb() const; + /** + * Toggles dereverb. + * + * \param enable 'true' to enable the feature, otherwise 'false'. + * + * \return Whether the operation succeeded. + */ + bool toggleDereverb(bool enable); + + /** + * \return The maximum attenuation of the noise in dB (negative number). + */ + std::int32_t getNoiseSuppress() const; + /** + * Set the maximum attenuation of the noise in dB. + * + * \param value Maximum attenuation of the noise in dB (negative number). + * + * \return Whether the operation succeeded. + */ + bool setNoiseSuppress(std::int32_t value); + + /** + * \return The power spectrum (vector of squared values). + */ + psd_t getPSD() const; + /** + * \return The noise estimate (vector of squared values). + */ + psd_t getNoisePSD() const; + + /** + * \return The amount of probability there was speech in the last processed frame, in percentual scale (0-100). + */ + std::int32_t getSpeechProb() const; + + /** + * \return Whether voice activity detection is enabled. + */ + bool usesVAD() const; + /** + * Toggles voice activity detection. + * + * \param enable 'true' to enable the feature, otherwise 'false'. + * + * \return Whether the operation succeeded. + */ + bool toggleVAD(bool enable); + +private: + AudioPreprocessor(const AudioPreprocessor &) = delete; + AudioPreprocessor &operator=(const AudioPreprocessor &) = delete; + + bool getBool(int op) const; + bool setBool(int op, bool value); + + std::int32_t getInt32(int op) const; + bool setInt32(int op, std::int32_t value); + + SpeexPreprocessState_ *m_handle = nullptr; +}; + +#endif diff --git a/src/mumble/CMakeLists.txt b/src/mumble/CMakeLists.txt index fa41283bcca..efde56b9611 100644 --- a/src/mumble/CMakeLists.txt +++ b/src/mumble/CMakeLists.txt @@ -111,6 +111,8 @@ set(MUMBLE_SOURCES "AudioOutputBuffer.cpp" "AudioOutputBuffer.h" "AudioOutputToken.h" + "AudioPreprocessor.cpp" + "AudioPreprocessor.h" "AudioStats.cpp" "AudioStats.h" "AudioStats.ui"