Skip to content

Commit

Permalink
Added TTS and ASR module
Browse files Browse the repository at this point in the history
  • Loading branch information
dialogflowchatbot committed Sep 8, 2024
1 parent a4ebea7 commit da7e18e
Show file tree
Hide file tree
Showing 10 changed files with 697 additions and 17 deletions.
10 changes: 6 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dialogflow"
version = "1.16.1"
version = "1.17.0"
edition = "2021"
homepage = "https://dialogflowchatbot.github.io/"
authors = ["dialogflowchatbot <[email protected]>"]
Expand All @@ -18,10 +18,12 @@ anyhow = "1.0"
axum = {version = "0.7", features = ["query", "tokio", "macros"]}
bigdecimal = "0.4"
# candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.5.1" }
candle = { version = "0.6", package = "candle-core", default-features = false }
candle-nn = "0.6"
# candle = { version = "0.6", package = "candle-core", default-features = false }
candle = { git = "https://github.com/huggingface/candle.git", package = "candle-core", default-features = false }
candle-nn = { git = "https://github.com/huggingface/candle.git" }
# candle-onnx = "0.6"
candle-transformers = { version = "0.6" }
candle-transformers = { git = "https://github.com/huggingface/candle.git" }
# candle-transformers = { version = "0.6" }
# candle-transformers = { version = "0.6", features = ["flash-attn"] }
# crossbeam-channel = "0.5"
frand = "0.10"
Expand Down
9 changes: 9 additions & 0 deletions src/ai/asr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use serde::{Deserialize, Serialize};

use super::huggingface::{load_bert_model_files, HuggingFaceModel, HuggingFaceModelInfo};

#[derive(Clone, Deserialize, Serialize)]
#[serde(tag = "id", content = "model")]
pub(crate) enum AsrProvider {
HuggingFace(HuggingFaceModel),
}
31 changes: 31 additions & 0 deletions src/ai/audio.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use candle::Tensor;

use crate::result::Result;

pub fn normalize_loudness(
wav: &Tensor,
sample_rate: u32,
loudness_compressor: bool,
) -> Result<Tensor> {
let energy = wav.sqr()?.mean_all()?.sqrt()?.to_vec0::<f32>()?;
if energy < 2e-3 {
return Ok(wav.clone());
}
let wav_array = wav.to_vec1::<f32>()?;
let mut meter = super::bs1770::ChannelLoudnessMeter::new(sample_rate);
meter.push(wav_array.into_iter());
let power = meter.as_100ms_windows();
let loudness = match super::bs1770::gated_mean(power) {
None => return Ok(wav.clone()),
Some(gp) => gp.loudness_lkfs() as f64,
};
let delta_loudness = -14. - loudness;
let gain = 10f64.powf(delta_loudness / 20.);
let wav = (wav * gain)?;
if loudness_compressor {
let r = wav.tanh()?;
Ok(r)
} else {
Ok(wav)
}
}
Loading

0 comments on commit da7e18e

Please sign in to comment.