Skip to content

Commit

Permalink
CとJavaのブロッキングAPIを実装 (#705)
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip authored Dec 10, 2023
1 parent 00a1c53 commit 0788c2e
Show file tree
Hide file tree
Showing 14 changed files with 212 additions and 236 deletions.
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true, features = ["preserve_order"] }
tempfile.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["rt"] }
tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする
tracing.workspace = true
uuid = { workspace = true, features = ["v4", "serde"] }
voicevox_core_macros = { path = "../voicevox_core_macros" }
Expand Down
54 changes: 9 additions & 45 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ impl<O> self::blocking::Synthesizer<O> {
self.status.is_loaded_model(voice_model_id)
}

fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
#[doc(hidden)]
pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
self.status.is_loaded_model_by_style_id(style_id)
}

Expand Down Expand Up @@ -1039,47 +1040,6 @@ pub trait PerformInference {
) -> Result<Vec<f32>>;
}

impl<O> PerformInference for self::tokio::Synthesizer<O> {
fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
self.0.predict_duration(phoneme_vector, style_id)
}

fn predict_intonation(
&self,
length: usize,
vowel_phoneme_vector: &[i64],
consonant_phoneme_vector: &[i64],
start_accent_vector: &[i64],
end_accent_vector: &[i64],
start_accent_phrase_vector: &[i64],
end_accent_phrase_vector: &[i64],
style_id: StyleId,
) -> Result<Vec<f32>> {
self.0.predict_intonation(
length,
vowel_phoneme_vector,
consonant_phoneme_vector,
start_accent_vector,
end_accent_vector,
start_accent_phrase_vector,
end_accent_phrase_vector,
style_id,
)
}

fn decode(
&self,
length: usize,
phoneme_size: usize,
f0: &[f32],
phoneme_vector: &[f32],
style_id: StyleId,
) -> Result<Vec<f32>> {
self.0
.decode(length, phoneme_size, f0, phoneme_vector, style_id)
}
}

impl<O> PerformInference for self::blocking::Synthesizer<O> {
fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
// FIXME: `Status::ids_for`があるため、ここは不要なはず
Expand Down Expand Up @@ -1516,7 +1476,9 @@ mod tests {
30, 35, 14, 23, 7, 21, 14, 43, 30, 30, 23, 30, 35, 30, 0,
];

let result = syntesizer.predict_duration(&phoneme_vector, StyleId::new(1));
let result = syntesizer
.0
.predict_duration(&phoneme_vector, StyleId::new(1));

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), phoneme_vector.len());
Expand Down Expand Up @@ -1546,7 +1508,7 @@ mod tests {
let start_accent_phrase_vector = [0, 1, 0, 0, 0];
let end_accent_phrase_vector = [0, 0, 0, 1, 0];

let result = syntesizer.predict_intonation(
let result = syntesizer.0.predict_intonation(
vowel_phoneme_vector.len(),
&vowel_phoneme_vector,
&consonant_phoneme_vector,
Expand Down Expand Up @@ -1599,7 +1561,9 @@ mod tests {
set_one(30, 45..60);
set_one(0, 60..69);

let result = syntesizer.decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));
let result = syntesizer
.0
.decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
Expand Down
1 change: 0 additions & 1 deletion crates/voicevox_core_c_api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ once_cell.workspace = true
process_path.workspace = true
serde_json = { workspace = true, features = ["preserve_order"] }
thiserror.workspace = true
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tracing.workspace = true
tracing-subscriber = { workspace = true, features = ["env-filter"] }
uuid.workspace = true
Expand Down
20 changes: 8 additions & 12 deletions crates/voicevox_core_c_api/src/c_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,25 @@ use voicevox_core::{InitializeOptions, Result, VoiceModelId};
use crate::{CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel};

impl OpenJtalkRc {
pub(crate) async fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
pub(crate) fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
Ok(Self {
open_jtalk: voicevox_core::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?,
open_jtalk: voicevox_core::blocking::OpenJtalk::new(open_jtalk_dic_dir)?,
})
}
}

impl VoicevoxSynthesizer {
pub(crate) fn new(open_jtalk: &OpenJtalkRc, options: &InitializeOptions) -> Result<Self> {
// ロガーを起動
// FIXME: `into_result_code_with_error`を`run`とかに改名し、`init_logger`をその中に移動
let _ = *crate::RUNTIME;

let synthesizer =
voicevox_core::tokio::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
voicevox_core::blocking::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
Ok(Self { synthesizer })
}

pub(crate) async fn load_voice_model(
pub(crate) fn load_voice_model(
&self,
model: &voicevox_core::tokio::VoiceModel,
model: &voicevox_core::blocking::VoiceModel,
) -> CApiResult<()> {
self.synthesizer.load_voice_model(model).await?;
self.synthesizer.load_voice_model(model)?;
Ok(())
}

Expand All @@ -43,8 +39,8 @@ impl VoicevoxSynthesizer {
}

impl VoicevoxVoiceModel {
pub(crate) async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
let model = voicevox_core::tokio::VoiceModel::from_path(path).await?;
pub(crate) fn from_path(path: impl AsRef<Path>) -> Result<Self> {
let model = voicevox_core::blocking::VoiceModel::from_path(path)?;
let id = CString::new(model.id().raw_voice_model_id().as_str()).unwrap();
let metas = CString::new(serde_json::to_string(model.metas()).unwrap()).unwrap();
Ok(Self { model, id, metas })
Expand Down
42 changes: 23 additions & 19 deletions crates/voicevox_core_c_api/src/compatible_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ macro_rules! ensure_initialized {
static ERROR_MESSAGE: Lazy<Mutex<String>> = Lazy::new(|| Mutex::new(String::new()));

struct VoiceModelSet {
all_vvms: Vec<voicevox_core::tokio::VoiceModel>,
all_vvms: Vec<voicevox_core::blocking::VoiceModel>,
all_metas_json: CString,
style_model_map: BTreeMap<StyleId, VoiceModelId>,
model_map: BTreeMap<VoiceModelId, voicevox_core::tokio::VoiceModel>,
model_map: BTreeMap<VoiceModelId, voicevox_core::blocking::VoiceModel>,
}

static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
let all_vvms = RUNTIME.block_on(get_all_models());
let all_vvms = get_all_models();
let model_map: BTreeMap<_, _> = all_vvms
.iter()
.map(|vvm| (vvm.id().clone(), vvm.clone()))
Expand All @@ -52,7 +52,7 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
/// # Panics
///
/// 失敗したらパニックする
async fn get_all_models() -> Vec<voicevox_core::tokio::VoiceModel> {
fn get_all_models() -> Vec<voicevox_core::blocking::VoiceModel> {
let root_dir = if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) {
root_dir.into()
} else {
Expand All @@ -64,17 +64,13 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
.join("model")
};

let vvm_paths = root_dir
root_dir
.read_dir()
.and_then(|entries| entries.collect::<std::result::Result<Vec<_>, _>>())
.unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display()))
.into_iter()
.filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm"))
.map(|entry| voicevox_core::tokio::VoiceModel::from_path(entry.path()));

futures::future::join_all(vvm_paths)
.await
.into_iter()
.map(|entry| voicevox_core::blocking::VoiceModel::from_path(entry.path()))
.collect::<std::result::Result<_, _>>()
.unwrap()
}
Expand All @@ -88,10 +84,10 @@ fn voice_model_set() -> &'static VoiceModelSet {
&VOICE_MODEL_SET
}

static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::tokio::Synthesizer<()>>>> =
static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::blocking::Synthesizer<()>>>> =
Lazy::new(|| Mutex::new(None));

fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::tokio::Synthesizer<()>>> {
fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::blocking::Synthesizer<()>>> {
SYNTHESIZER.lock().unwrap()
}

Expand All @@ -104,10 +100,9 @@ fn set_message(message: &str) {

#[no_mangle]
pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_models: bool) -> bool {
// FIXME: ここはもう`RUNTIME.block_on`で包む必要は無くなっているのだが、ロガーの設定を`RUNTIME`
// で行っているという構造になってしまっているので、外すとロガーの初期化が遅れてしまでう
let result = RUNTIME.block_on(async {
let synthesizer = voicevox_core::tokio::Synthesizer::new(
init_logger_once();
let result = (|| {
let synthesizer = voicevox_core::blocking::Synthesizer::new(
(),
&voicevox_core::InitializeOptions {
acceleration_mode: if use_gpu {
Expand All @@ -121,12 +116,12 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod

if load_all_models {
for model in &voice_model_set().all_vvms {
synthesizer.load_voice_model(model).await?;
synthesizer.load_voice_model(model)?;
}
}

Ok::<_, voicevox_core::Error>(synthesizer)
});
})();

match result {
Ok(synthesizer) => {
Expand All @@ -142,12 +137,13 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod

#[no_mangle]
pub extern "C" fn load_model(style_id: i64) -> bool {
init_logger_once();
let style_id = StyleId::new(style_id as u32);
let model_set = voice_model_set();
if let Some(model_id) = model_set.style_model_map.get(&style_id) {
let vvm = model_set.model_map.get(model_id).unwrap();
let synthesizer = &mut *lock_synthesizer();
let result = RUNTIME.block_on(ensure_initialized!(synthesizer).load_voice_model(vvm));
let result = ensure_initialized!(synthesizer).load_voice_model(vvm);
if let Some(err) = result.err() {
set_message(&format!("{err}"));
false
Expand All @@ -162,28 +158,33 @@ pub extern "C" fn load_model(style_id: i64) -> bool {

#[no_mangle]
pub extern "C" fn is_model_loaded(speaker_id: i64) -> bool {
init_logger_once();
ensure_initialized!(&*lock_synthesizer())
.is_loaded_model_by_style_id(StyleId::new(speaker_id as u32))
}

#[no_mangle]
pub extern "C" fn finalize() {
init_logger_once();
*lock_synthesizer() = None;
}

#[no_mangle]
pub extern "C" fn metas() -> *const c_char {
init_logger_once();
let model_set = voice_model_set();
model_set.all_metas_json.as_ptr()
}

#[no_mangle]
pub extern "C" fn last_error_message() -> *const c_char {
init_logger_once();
ERROR_MESSAGE.lock().unwrap().as_ptr() as *const c_char
}

#[no_mangle]
pub extern "C" fn supported_devices() -> *const c_char {
init_logger_once();
return SUPPORTED_DEVICES.as_ptr();

static SUPPORTED_DEVICES: Lazy<CString> = Lazy::new(|| {
Expand All @@ -198,6 +199,7 @@ pub extern "C" fn yukarin_s_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let synthesizer = &*lock_synthesizer();
let result = ensure_initialized!(synthesizer).predict_duration(
unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) },
Expand Down Expand Up @@ -228,6 +230,7 @@ pub extern "C" fn yukarin_sa_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let synthesizer = &*lock_synthesizer();
let result = ensure_initialized!(synthesizer).predict_intonation(
length as usize,
Expand Down Expand Up @@ -261,6 +264,7 @@ pub extern "C" fn decode_forward(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
init_logger_once();
let length = length as usize;
let phoneme_size = phoneme_size as usize;
let synthesizer = &*lock_synthesizer();
Expand Down
Loading

0 comments on commit 0788c2e

Please sign in to comment.