REFAC(client): Introduce AudioPreprocessor

mumble-voip · Jul 17, 2024 · 1bd56b1 · 1bd56b1
1 parent b75fe54
commit 1bd56b1
Show file tree

Hide file tree

Showing 3 changed files with 395 additions and 0 deletions.
diff --git a/src/mumble/AudioPreprocessor.cpp b/src/mumble/AudioPreprocessor.cpp
@@ -0,0 +1,178 @@
+// Copyright 2024 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#include "AudioPreprocessor.h"
+
+#include <utility>
+
+#include <speex/speex_preprocess.h>
+
+AudioPreprocessor::AudioPreprocessor(AudioPreprocessor &&other) : m_handle(std::exchange(other.m_handle, nullptr)) {
+}
+
+AudioPreprocessor::~AudioPreprocessor() {
+	deinit();
+}
+
+AudioPreprocessor &AudioPreprocessor::operator=(AudioPreprocessor &&other) {
+	m_handle = std::exchange(other.m_handle, nullptr);
+	return *this;
+}
+
+bool AudioPreprocessor::init(const std::uint32_t sampleRate, const std::uint32_t quantum) {
+	deinit();
+
+	m_handle = speex_preprocess_state_init(static_cast< int >(quantum), static_cast< int >(sampleRate));
+	return m_handle != nullptr;
+}
+
+void AudioPreprocessor::deinit() {
+	if (m_handle) {
+		speex_preprocess_state_destroy(m_handle);
+	}
+}
+
+bool AudioPreprocessor::run(std::int16_t &samples) {
+	return speex_preprocess_run(m_handle, &samples);
+}
+
+SpeexEchoState_ *AudioPreprocessor::getEchoState() {
+	SpeexEchoState_ *handle;
+	return speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_ECHO_STATE, &handle) == 0 ? handle : nullptr;
+}
+
+bool AudioPreprocessor::setEchoState(SpeexEchoState_ *handle) {
+	return speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_SET_ECHO_STATE, handle) == 0;
+}
+
+bool AudioPreprocessor::usesAGC() const {
+	return getBool(SPEEX_PREPROCESS_GET_AGC);
+}
+
+bool AudioPreprocessor::toggleAGC(const bool enable) {
+	return setBool(SPEEX_PREPROCESS_SET_AGC, enable);
+}
+
+std::int32_t AudioPreprocessor::getAGCDecrement() const {
+	return getInt32(SPEEX_PREPROCESS_GET_AGC_DECREMENT);
+}
+
+bool AudioPreprocessor::setAGCDecrement(const std::int32_t value) {
+	return setInt32(SPEEX_PREPROCESS_SET_AGC_DECREMENT, value);
+}
+
+std::int32_t AudioPreprocessor::getAGCGain() const {
+	return getInt32(SPEEX_PREPROCESS_GET_AGC_GAIN);
+}
+
+std::int32_t AudioPreprocessor::getAGCIncrement() const {
+	return getInt32(SPEEX_PREPROCESS_GET_AGC_INCREMENT);
+}
+
+bool AudioPreprocessor::setAGCIncrement(const std::int32_t value) {
+	return setInt32(SPEEX_PREPROCESS_SET_AGC_INCREMENT, value);
+}
+
+std::int32_t AudioPreprocessor::getAGCMaxGain() const {
+	return getInt32(SPEEX_PREPROCESS_GET_AGC_MAX_GAIN);
+}
+
+bool AudioPreprocessor::setAGCMaxGain(const std::int32_t value) {
+	return setInt32(SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, value);
+}
+
+std::int32_t AudioPreprocessor::getAGCTarget() const {
+	return getInt32(SPEEX_PREPROCESS_GET_AGC_TARGET);
+}
+
+bool AudioPreprocessor::setAGCTarget(const std::int32_t value) {
+	return setInt32(SPEEX_PREPROCESS_SET_AGC_TARGET, value);
+}
+
+bool AudioPreprocessor::usesDenoise() const {
+	return getBool(SPEEX_PREPROCESS_GET_DENOISE);
+}
+
+bool AudioPreprocessor::toggleDenoise(const bool enable) {
+	return setBool(SPEEX_PREPROCESS_SET_DENOISE, enable);
+}
+
+bool AudioPreprocessor::usesDereverb() const {
+	return getBool(SPEEX_PREPROCESS_GET_DEREVERB);
+}
+
+bool AudioPreprocessor::toggleDereverb(const bool enable) {
+	return setBool(SPEEX_PREPROCESS_SET_DEREVERB, enable);
+}
+
+std::int32_t AudioPreprocessor::getNoiseSuppress() const {
+	return getInt32(SPEEX_PREPROCESS_GET_NOISE_SUPPRESS);
+}
+
+bool AudioPreprocessor::setNoiseSuppress(const std::int32_t value) {
+	return setInt32(SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, value);
+}
+
+AudioPreprocessor::psd_t AudioPreprocessor::getPSD() const {
+	const auto size = getInt32(SPEEX_PREPROCESS_GET_PSD_SIZE);
+	if (!size) {
+		return {};
+	}
+
+	psd_t ret(static_cast< size_t >(size));
+	if (speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_PSD, ret.data()) != 0) {
+		return {};
+	}
+
+	return ret;
+}
+
+AudioPreprocessor::psd_t AudioPreprocessor::getNoisePSD() const {
+	const auto size = getInt32(SPEEX_PREPROCESS_GET_PSD_SIZE);
+	if (!size) {
+		return {};
+	}
+
+	psd_t ret(static_cast< size_t >(size));
+	if (speex_preprocess_ctl(m_handle, SPEEX_PREPROCESS_GET_NOISE_PSD, ret.data()) != 0) {
+		return {};
+	}
+
+	return ret;
+}
+
+std::int32_t AudioPreprocessor::getSpeechProb() const {
+	return getInt32(SPEEX_PREPROCESS_GET_PROB);
+}
+
+bool AudioPreprocessor::usesVAD() const {
+	return getBool(SPEEX_PREPROCESS_GET_VAD);
+}
+
+bool AudioPreprocessor::toggleVAD(const bool enable) {
+	return setBool(SPEEX_PREPROCESS_SET_VAD, enable);
+}
+
+bool AudioPreprocessor::getBool(const int op) const {
+	const auto val = getInt32(op);
+	return static_cast< bool >(val);
+}
+
+bool AudioPreprocessor::setBool(const int op, const bool value) {
+	return setInt32(op, value);
+}
+
+std::int32_t AudioPreprocessor::getInt32(const int op) const {
+	spx_int32_t value;
+	if (speex_preprocess_ctl(m_handle, op, &value) != 0) {
+		return 0;
+	}
+
+	return value;
+}
+
+bool AudioPreprocessor::setInt32(const int op, std::int32_t value) {
+	return speex_preprocess_ctl(m_handle, op, &value) == 0;
+}
diff --git a/src/mumble/AudioPreprocessor.h b/src/mumble/AudioPreprocessor.h
@@ -0,0 +1,215 @@
+// Copyright 2024 The Mumble Developers. All rights reserved.
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file at the root of the
+// Mumble source tree or at <https://www.mumble.info/LICENSE>.
+
+#ifndef MUMBLE_MUMBLE_AUDIOPREPROCESSOR_H_
+#define MUMBLE_MUMBLE_AUDIOPREPROCESSOR_H_
+
+#include <cstdint>
+#include <vector>
+
+struct SpeexEchoState_;
+struct SpeexPreprocessState_;
+
+class AudioPreprocessor {
+public:
+	using psd_t = std::vector< std::int32_t >;
+
+	/**
+	 * \return Whether the object is initialized and all its methods can be safely called.
+	 */
+	constexpr explicit operator bool() const { return m_handle != nullptr; }
+
+	AudioPreprocessor() = default;
+	AudioPreprocessor(AudioPreprocessor &&other);
+	~AudioPreprocessor();
+
+	AudioPreprocessor &operator=(AudioPreprocessor &&other);
+
+	/**
+	 * Initializes the object. This must be called at least once before any other function!
+	 *
+	 * \param sampleRate The sample rate in Hz.
+	 *
+	 * \param quantum The number of samples that are expected for each preprocess cycle.
+	 *                Should correspond to 10-20 ms (480-960 with a sample rate of 48000).
+	 *
+	 * \return Whether initialization succeeded.
+	 */
+	bool init(std::uint32_t sampleRate, std::uint32_t quantum);
+	/**
+	 * Deinitializes the object, doesn't do anything if it's not initialized.
+	 * Once that is done no function other than init() can be called.
+	 */
+	void deinit();
+
+	/**
+	 * Runs the preprocessor, which analyzes and modifies the samples.
+	 *
+	 * \param samples The samples that should be processed (modified in-place).
+	 *
+	 * \return Whether speech was detected, if VAD is enabled. Always 'true' otherwise.
+	 */
+	bool run(std::int16_t &samples);
+
+	/**
+	 * \return Handle to the echo state object.
+	 */
+	SpeexEchoState_ *getEchoState();
+	/**
+	 * Set the echo state object, to toggle the echo canceller.
+	 *
+	 * \param handle Handle to the echo state. Passing 'nullptr' disables the feature.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setEchoState(SpeexEchoState_ *handle);
+
+	/**
+	 * \return Whether automatic gain control is enabled.
+	 */
+	bool usesAGC() const;
+	/**
+	 * Toggles automatic gain control.
+	 *
+	 * \param enable 'true' to enable the feature, otherwise 'false'.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool toggleAGC(bool enable);
+
+	/**
+	 * \return The maximal gain decrease in dB/second.
+	 */
+	std::int32_t getAGCDecrement() const;
+	/**
+	 * Set the maximal gain decrease in dB/second.
+	 *
+	 * \param value Maximal gain decrease in dB/second.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setAGCDecrement(std::int32_t value);
+	/**
+	 * \return The current gain in percentual scale (0-100).
+	 */
+	std::int32_t getAGCGain() const;
+
+	/**
+	 * \return The maximal gain increase in dB/second.
+	 */
+	std::int32_t getAGCIncrement() const;
+	/**
+	 * Set the maximal gain increase in dB/second.
+	 *
+	 * \param value Maximal gain increase in dB/second.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setAGCIncrement(std::int32_t value);
+	/**
+	 * \return The maximal gain in dB.
+	 */
+	std::int32_t getAGCMaxGain() const;
+	/**
+	 * Set the maximal gain in dB.
+	 *
+	 * \param value Maximal gain in dB.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setAGCMaxGain(std::int32_t value);
+	/**
+	 * \return The automatic gain control level, in a scale from 1 to 32768.
+	 */
+	std::int32_t getAGCTarget() const;
+	/**
+	 * Set the automatic gain control level.
+	 *
+	 * \param value Automatic gain control level, in a scale from 1 to 32768.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setAGCTarget(std::int32_t value);
+
+	/**
+	 * \return Whether denoise is enabled.
+	 */
+	bool usesDenoise() const;
+	/**
+	 * Toggles denoise.
+	 *
+	 * \param enable 'true' to enable the feature, otherwise 'false'.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool toggleDenoise(bool enable);
+
+	/**
+	 * \return Whether dereverb is enabled.
+	 */
+	bool usesDereverb() const;
+	/**
+	 * Toggles dereverb.
+	 *
+	 * \param enable 'true' to enable the feature, otherwise 'false'.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool toggleDereverb(bool enable);
+
+	/**
+	 * \return The maximum attenuation of the noise in dB (negative number).
+	 */
+	std::int32_t getNoiseSuppress() const;
+	/**
+	 * Set the maximum attenuation of the noise in dB.
+	 *
+	 * \param value Maximum attenuation of the noise in dB (negative number).
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool setNoiseSuppress(std::int32_t value);
+
+	/**
+	 * \return The power spectrum (vector of squared values).
+	 */
+	psd_t getPSD() const;
+	/**
+	 * \return The noise estimate (vector of squared values).
+	 */
+	psd_t getNoisePSD() const;
+
+	/**
+	 * \return The amount of probability there was speech in the last processed frame, in percentual scale (0-100).
+	 */
+	std::int32_t getSpeechProb() const;
+
+	/**
+	 * \return Whether voice activity detection is enabled.
+	 */
+	bool usesVAD() const;
+	/**
+	 * Toggles voice activity detection.
+	 *
+	 * \param enable 'true' to enable the feature, otherwise 'false'.
+	 *
+	 * \return Whether the operation succeeded.
+	 */
+	bool toggleVAD(bool enable);
+
+private:
+	AudioPreprocessor(const AudioPreprocessor &)            = delete;
+	AudioPreprocessor &operator=(const AudioPreprocessor &) = delete;
+
+	bool getBool(int op) const;
+	bool setBool(int op, bool value);
+
+	std::int32_t getInt32(int op) const;
+	bool setInt32(int op, std::int32_t value);
+
+	SpeexPreprocessState_ *m_handle = nullptr;
+};
+
+#endif