CとJavaのブロッキングAPIを実装 (#705)

VOICEVOX · Dec 10, 2023 · 0788c2e · 0788c2e
1 parent 00a1c53
commit 0788c2e
Show file tree

Hide file tree

Showing 14 changed files with 212 additions and 236 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml
@@ -35,7 +35,7 @@ serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true, features = ["preserve_order"] }
 tempfile.workspace = true
 thiserror.workspace = true
-tokio = { workspace = true, features = ["rt"] }
+tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする
 tracing.workspace = true
 uuid = { workspace = true, features = ["v4", "serde"] }
 voicevox_core_macros = { path = "../voicevox_core_macros" }

diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
@@ -357,7 +357,8 @@ impl<O> self::blocking::Synthesizer<O> {
         self.status.is_loaded_model(voice_model_id)
     }
 
-    fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
+    #[doc(hidden)]
+    pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
         self.status.is_loaded_model_by_style_id(style_id)
     }
 
@@ -1039,47 +1040,6 @@ pub trait PerformInference {
     ) -> Result<Vec<f32>>;
 }
 
-impl<O> PerformInference for self::tokio::Synthesizer<O> {
-    fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
-        self.0.predict_duration(phoneme_vector, style_id)
-    }
-
-    fn predict_intonation(
-        &self,
-        length: usize,
-        vowel_phoneme_vector: &[i64],
-        consonant_phoneme_vector: &[i64],
-        start_accent_vector: &[i64],
-        end_accent_vector: &[i64],
-        start_accent_phrase_vector: &[i64],
-        end_accent_phrase_vector: &[i64],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>> {
-        self.0.predict_intonation(
-            length,
-            vowel_phoneme_vector,
-            consonant_phoneme_vector,
-            start_accent_vector,
-            end_accent_vector,
-            start_accent_phrase_vector,
-            end_accent_phrase_vector,
-            style_id,
-        )
-    }
-
-    fn decode(
-        &self,
-        length: usize,
-        phoneme_size: usize,
-        f0: &[f32],
-        phoneme_vector: &[f32],
-        style_id: StyleId,
-    ) -> Result<Vec<f32>> {
-        self.0
-            .decode(length, phoneme_size, f0, phoneme_vector, style_id)
-    }
-}
-
 impl<O> PerformInference for self::blocking::Synthesizer<O> {
     fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
         // FIXME: `Status::ids_for`があるため、ここは不要なはず
@@ -1516,7 +1476,9 @@ mod tests {
             30, 35, 14, 23, 7, 21, 14, 43, 30, 30, 23, 30, 35, 30, 0,
         ];
 
-        let result = syntesizer.predict_duration(&phoneme_vector, StyleId::new(1));
+        let result = syntesizer
+            .0
+            .predict_duration(&phoneme_vector, StyleId::new(1));
 
         assert!(result.is_ok(), "{result:?}");
         assert_eq!(result.unwrap().len(), phoneme_vector.len());
@@ -1546,7 +1508,7 @@ mod tests {
         let start_accent_phrase_vector = [0, 1, 0, 0, 0];
         let end_accent_phrase_vector = [0, 0, 0, 1, 0];
 
-        let result = syntesizer.predict_intonation(
+        let result = syntesizer.0.predict_intonation(
             vowel_phoneme_vector.len(),
             &vowel_phoneme_vector,
             &consonant_phoneme_vector,
@@ -1599,7 +1561,9 @@ mod tests {
         set_one(30, 45..60);
         set_one(0, 60..69);
 
-        let result = syntesizer.decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));
+        let result = syntesizer
+            .0
+            .decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, StyleId::new(1));
 
         assert!(result.is_ok(), "{result:?}");
         assert_eq!(result.unwrap().len(), F0_LENGTH * 256);

diff --git a/crates/voicevox_core_c_api/Cargo.toml b/crates/voicevox_core_c_api/Cargo.toml
@@ -29,7 +29,6 @@ once_cell.workspace = true
 process_path.workspace = true
 serde_json = { workspace = true, features = ["preserve_order"] }
 thiserror.workspace = true
-tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing.workspace = true
 tracing-subscriber = { workspace = true, features = ["env-filter"] }
 uuid.workspace = true

diff --git a/crates/voicevox_core_c_api/src/c_impls.rs b/crates/voicevox_core_c_api/src/c_impls.rs
@@ -5,29 +5,25 @@ use voicevox_core::{InitializeOptions, Result, VoiceModelId};
 use crate::{CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel};
 
 impl OpenJtalkRc {
-    pub(crate) async fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
+    pub(crate) fn new(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
         Ok(Self {
-            open_jtalk: voicevox_core::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?,
+            open_jtalk: voicevox_core::blocking::OpenJtalk::new(open_jtalk_dic_dir)?,
         })
     }
 }
 
 impl VoicevoxSynthesizer {
     pub(crate) fn new(open_jtalk: &OpenJtalkRc, options: &InitializeOptions) -> Result<Self> {
-        // ロガーを起動
-        // FIXME: `into_result_code_with_error`を`run`とかに改名し、`init_logger`をその中に移動
-        let _ = *crate::RUNTIME;
-
         let synthesizer =
-            voicevox_core::tokio::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
+            voicevox_core::blocking::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?;
         Ok(Self { synthesizer })
     }
 
-    pub(crate) async fn load_voice_model(
+    pub(crate) fn load_voice_model(
         &self,
-        model: &voicevox_core::tokio::VoiceModel,
+        model: &voicevox_core::blocking::VoiceModel,
     ) -> CApiResult<()> {
-        self.synthesizer.load_voice_model(model).await?;
+        self.synthesizer.load_voice_model(model)?;
         Ok(())
     }
 
@@ -43,8 +39,8 @@ impl VoicevoxSynthesizer {
 }
 
 impl VoicevoxVoiceModel {
-    pub(crate) async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
-        let model = voicevox_core::tokio::VoiceModel::from_path(path).await?;
+    pub(crate) fn from_path(path: impl AsRef<Path>) -> Result<Self> {
+        let model = voicevox_core::blocking::VoiceModel::from_path(path)?;
         let id = CString::new(model.id().raw_voice_model_id().as_str()).unwrap();
         let metas = CString::new(serde_json::to_string(model.metas()).unwrap()).unwrap();
         Ok(Self { model, id, metas })

diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs
@@ -20,14 +20,14 @@ macro_rules! ensure_initialized {
 static ERROR_MESSAGE: Lazy<Mutex<String>> = Lazy::new(|| Mutex::new(String::new()));
 
 struct VoiceModelSet {
-    all_vvms: Vec<voicevox_core::tokio::VoiceModel>,
+    all_vvms: Vec<voicevox_core::blocking::VoiceModel>,
     all_metas_json: CString,
     style_model_map: BTreeMap<StyleId, VoiceModelId>,
-    model_map: BTreeMap<VoiceModelId, voicevox_core::tokio::VoiceModel>,
+    model_map: BTreeMap<VoiceModelId, voicevox_core::blocking::VoiceModel>,
 }
 
 static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
-    let all_vvms = RUNTIME.block_on(get_all_models());
+    let all_vvms = get_all_models();
     let model_map: BTreeMap<_, _> = all_vvms
         .iter()
         .map(|vvm| (vvm.id().clone(), vvm.clone()))
@@ -52,7 +52,7 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
     /// # Panics
     ///
     /// 失敗したらパニックする
-    async fn get_all_models() -> Vec<voicevox_core::tokio::VoiceModel> {
+    fn get_all_models() -> Vec<voicevox_core::blocking::VoiceModel> {
         let root_dir = if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) {
             root_dir.into()
         } else {
@@ -64,17 +64,13 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| {
                 .join("model")
         };
 
-        let vvm_paths = root_dir
+        root_dir
             .read_dir()
             .and_then(|entries| entries.collect::<std::result::Result<Vec<_>, _>>())
             .unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display()))
             .into_iter()
             .filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm"))
-            .map(|entry| voicevox_core::tokio::VoiceModel::from_path(entry.path()));
-
-        futures::future::join_all(vvm_paths)
-            .await
-            .into_iter()
+            .map(|entry| voicevox_core::blocking::VoiceModel::from_path(entry.path()))
             .collect::<std::result::Result<_, _>>()
             .unwrap()
     }
@@ -88,10 +84,10 @@ fn voice_model_set() -> &'static VoiceModelSet {
     &VOICE_MODEL_SET
 }
 
-static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::tokio::Synthesizer<()>>>> =
+static SYNTHESIZER: Lazy<Mutex<Option<voicevox_core::blocking::Synthesizer<()>>>> =
     Lazy::new(|| Mutex::new(None));
 
-fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::tokio::Synthesizer<()>>> {
+fn lock_synthesizer() -> MutexGuard<'static, Option<voicevox_core::blocking::Synthesizer<()>>> {
     SYNTHESIZER.lock().unwrap()
 }
 
@@ -104,10 +100,9 @@ fn set_message(message: &str) {
 
 #[no_mangle]
 pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_models: bool) -> bool {
-    // FIXME: ここはもう`RUNTIME.block_on`で包む必要は無くなっているのだが、ロガーの設定を`RUNTIME`
-    // で行っているという構造になってしまっているので、外すとロガーの初期化が遅れてしまでう
-    let result = RUNTIME.block_on(async {
-        let synthesizer = voicevox_core::tokio::Synthesizer::new(
+    init_logger_once();
+    let result = (|| {
+        let synthesizer = voicevox_core::blocking::Synthesizer::new(
             (),
             &voicevox_core::InitializeOptions {
                 acceleration_mode: if use_gpu {
@@ -121,12 +116,12 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod
 
         if load_all_models {
             for model in &voice_model_set().all_vvms {
-                synthesizer.load_voice_model(model).await?;
+                synthesizer.load_voice_model(model)?;
             }
         }
 
         Ok::<_, voicevox_core::Error>(synthesizer)
-    });
+    })();
 
     match result {
         Ok(synthesizer) => {
@@ -142,12 +137,13 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod
 
 #[no_mangle]
 pub extern "C" fn load_model(style_id: i64) -> bool {
+    init_logger_once();
     let style_id = StyleId::new(style_id as u32);
     let model_set = voice_model_set();
     if let Some(model_id) = model_set.style_model_map.get(&style_id) {
         let vvm = model_set.model_map.get(model_id).unwrap();
         let synthesizer = &mut *lock_synthesizer();
-        let result = RUNTIME.block_on(ensure_initialized!(synthesizer).load_voice_model(vvm));
+        let result = ensure_initialized!(synthesizer).load_voice_model(vvm);
         if let Some(err) = result.err() {
             set_message(&format!("{err}"));
             false
@@ -162,28 +158,33 @@ pub extern "C" fn load_model(style_id: i64) -> bool {
 
 #[no_mangle]
 pub extern "C" fn is_model_loaded(speaker_id: i64) -> bool {
+    init_logger_once();
     ensure_initialized!(&*lock_synthesizer())
         .is_loaded_model_by_style_id(StyleId::new(speaker_id as u32))
 }
 
 #[no_mangle]
 pub extern "C" fn finalize() {
+    init_logger_once();
     *lock_synthesizer() = None;
 }
 
 #[no_mangle]
 pub extern "C" fn metas() -> *const c_char {
+    init_logger_once();
     let model_set = voice_model_set();
     model_set.all_metas_json.as_ptr()
 }
 
 #[no_mangle]
 pub extern "C" fn last_error_message() -> *const c_char {
+    init_logger_once();
     ERROR_MESSAGE.lock().unwrap().as_ptr() as *const c_char
 }
 
 #[no_mangle]
 pub extern "C" fn supported_devices() -> *const c_char {
+    init_logger_once();
     return SUPPORTED_DEVICES.as_ptr();
 
     static SUPPORTED_DEVICES: Lazy<CString> = Lazy::new(|| {
@@ -198,6 +199,7 @@ pub extern "C" fn yukarin_s_forward(
     speaker_id: *mut i64,
     output: *mut f32,
 ) -> bool {
+    init_logger_once();
     let synthesizer = &*lock_synthesizer();
     let result = ensure_initialized!(synthesizer).predict_duration(
         unsafe { std::slice::from_raw_parts_mut(phoneme_list, length as usize) },
@@ -228,6 +230,7 @@ pub extern "C" fn yukarin_sa_forward(
     speaker_id: *mut i64,
     output: *mut f32,
 ) -> bool {
+    init_logger_once();
     let synthesizer = &*lock_synthesizer();
     let result = ensure_initialized!(synthesizer).predict_intonation(
         length as usize,
@@ -261,6 +264,7 @@ pub extern "C" fn decode_forward(
     speaker_id: *mut i64,
     output: *mut f32,
 ) -> bool {
+    init_logger_once();
     let length = length as usize;
     let phoneme_size = phoneme_size as usize;
     let synthesizer = &*lock_synthesizer();