diff --git a/.github/workflows/model_test.yaml b/.github/workflows/model_test.yaml index eeb6f5e..b8e2566 100644 --- a/.github/workflows/model_test.yaml +++ b/.github/workflows/model_test.yaml @@ -1,6 +1,12 @@ name: Model test -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + branches: + - master jobs: check_skip: diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml index 2044e60..e7caaa4 100644 --- a/.github/workflows/unittest.yaml +++ b/.github/workflows/unittest.yaml @@ -1,6 +1,12 @@ name: Unitest -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + branches: + - master jobs: check_skip: diff --git a/ci/test_model.py b/ci/test_model.py index 7600a25..45d1715 100755 --- a/ci/test_model.py +++ b/ci/test_model.py @@ -1,3 +1,6 @@ +import os +import shutil + import numpy as np from espnet2.bin.asr_inference import Speech2Text @@ -6,8 +9,8 @@ def _asr(model_name): - d = ModelDownloader() - speech2text = Speech2Text(**d.download_and_unpack(model_name)) + d = ModelDownloader("downloads") + speech2text = Speech2Text(**d.download_and_unpack(model_name, quiet=True)) speech = np.zeros((10000,), dtype=np.float32) nbests = speech2text(speech) text, *_ = nbests[0] @@ -15,13 +18,14 @@ def _asr(model_name): def _tts(model_name): - d = ModelDownloader() - text2speech = Text2Speech(**d.download_and_unpack(model_name)) - speech = np.zeros((10000,), dtype=np.float32) + d = ModelDownloader("downloads") + text2speech = Text2Speech(**d.download_and_unpack(model_name, quiet=True)) + inputs = {"text": "foo"} if text2speech.use_speech: - text2speech("foo", speech=speech) - else: - text2speech("foo") + inputs["speech"] = np.zeros((10000,), dtype=np.float32) + if text2speech.tts.spk_embed_dim is not None: + inputs["spembs"] = np.zeros((text2speech.tts.spk_embed_dim,), dtype=np.float32) + text2speech(**inputs) def test_model(): @@ -29,14 +33,19 @@ def test_model(): tasks = ["asr", "tts"] for task in tasks: - for model_name in d.query(task=task): - if d.query("valid", name=model_name)[0] == "false": - continue - print(f"#### Test {model_name} ####") - - if task == "asr": - _asr(model_name) - elif task == "tts": - _tts(model_name) - else: - raise NotImplementedError(f"task={task}") + for corpus in list(set(d.query("corpus", task=task))): + for model_name in d.query(task=task, corpus=corpus): + if d.query("valid", name=model_name)[0] == "false": + continue + print(f"#### Test {model_name} ####") + + if task == "asr": + _asr(model_name) + elif task == "tts": + _tts(model_name) + else: + raise NotImplementedError(f"task={task}") + + # NOTE(kan-bayashi): remove and recreate cache dir to reduce the disk usage. + shutil.rmtree("downloads") + os.makedirs("downloads") diff --git a/espnet_model_zoo/downloader.py b/espnet_model_zoo/downloader.py index 0936951..99924c7 100644 --- a/espnet_model_zoo/downloader.py +++ b/espnet_model_zoo/downloader.py @@ -46,7 +46,7 @@ def str_to_hash(string: Union[str, Path]) -> str: return hashlib.md5(str(string).encode("utf-8")).hexdigest() -def download(url, output_path, retry: int = 3, chunk_size: int = 8192): +def download(url, output_path, retry: int = 3, chunk_size: int = 8192, quiet=False): # Set retry session = requests.Session() session.mount("http://", requests.adapters.HTTPAdapter(max_retries=retry)) @@ -62,17 +62,22 @@ def download(url, output_path, retry: int = 3, chunk_size: int = 8192): # Write in temporary file with tempfile.TemporaryDirectory() as d: with (Path(d) / "tmp").open("wb") as f: - with tqdm( - desc=url, - total=file_size, - unit="B", - unit_scale=True, - unit_divisor=1024, - ) as pbar: + if quiet: for chunk in response.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) - pbar.update(len(chunk)) + else: + with tqdm( + desc=url, + total=file_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + ) as pbar: + for chunk in response.iter_content(chunk_size=chunk_size): + if chunk: + f.write(chunk) + pbar.update(len(chunk)) Path(output_path).parent.mkdir(parents=True, exist_ok=True) shutil.move(Path(d) / "tmp", output_path) @@ -224,7 +229,9 @@ def unpack_local_file(self, name: str = None) -> Dict[str, Union[str, List[str]] # Extract files from archived file return unpack(filename, outdir) - def download(self, name: str = None, version: int = -1, **kwargs: str) -> str: + def download( + self, name: str = None, version: int = -1, quiet: bool = False, **kwargs: str + ) -> str: url = self.get_url(name=name, version=version, **kwargs) if not is_url(url) and Path(url).exists(): return url @@ -233,7 +240,7 @@ def download(self, name: str = None, version: int = -1, **kwargs: str) -> str: filename = self._get_file_name(url) # Download the model file if not existing if not (outdir / filename).exists(): - download(url, outdir / filename) + download(url, outdir / filename, quiet=quiet) # Write the url for debugging with (outdir / "url").open("w", encoding="utf-8") as f: @@ -261,7 +268,7 @@ def download(self, name: str = None, version: int = -1, **kwargs: str) -> str: return str(outdir / filename) def download_and_unpack( - self, name: str = None, version: int = -1, **kwargs: str + self, name: str = None, version: int = -1, quiet: bool = False, **kwargs: str ) -> Dict[str, Union[str, List[str]]]: url = self.get_url(name=name, version=version, **kwargs) if not is_url(url) and Path(url).exists(): @@ -278,7 +285,7 @@ def download_and_unpack( return info # Download the file to an unique path - filename = self.download(url) + filename = self.download(url, quiet=quiet) # Extract files from archived file return unpack(filename, outdir) diff --git a/espnet_model_zoo/table.csv b/espnet_model_zoo/table.csv index cd593a4..52c4c0c 100644 --- a/espnet_model_zoo/table.csv +++ b/espnet_model_zoo/table.csv @@ -58,3 +58,23 @@ jsut,tts,kan-bayashi/jsut_tts_train_fastspeech2_transformer_teacher_raw_phn_jaco jsut,tts,kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave,https://zenodo.org/record/4391409/files/tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave.zip?download=1,24000,jp,female,1.5.1,0.9.6,acd6957,true chime4,asr,kamo-naoyuki/chime4_asr_train_asr_transformer3_raw_en_char_sp_valid.acc.ave,https://zenodo.org/record/4414883/files/asr_train_asr_transformer3_raw_en_char_sp_valid.acc.ave.zip?download=1,16000,en,,1.4.0,0.9.6,d5ddd5e,true dirha_wsj,asr,kamo-naoyuki/dirha_wsj_asr_train_asr_transformer_cmvn_raw_char_rir_scpdatadirha_irwav.scp_noise_db_range10_17_noise_scpdatadirha_noisewav.scp_speech_volume_normalize1.0_num_workers2_rir_apply_prob1._sp_valid.acc.ave,https://zenodo.org/record/4415021/files/asr_train_asr_transformer_cmvn_raw_char_rir_scpdatadirha_irwav.scp_noise_db_range10_17_noise_scpdatadirha_noisewav.scp_speech_volume_normalize1.0_num_workers2_rir_apply_prob1._sp_valid.acc.ave.zip?download=1,16000,en,,1.5.1,0.9.6,c30ce88,true +vctk,tts,kan-bayashi/vctk_tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4393279/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4393277/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394600/files/tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394598/files/tts_train_gst%2Bxvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394602/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394608/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +libritts,tts,kan-bayashi/libritts_tts_train_xvector_trasnformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4409704/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_tts_train_gst+xvector_trasnformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4409702/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss,https://zenodo.org/record/4418754/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_tts_train_gst+xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss,https://zenodo.org/record/4418774/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +vctk,tts,kan-bayashi/vctk_xvector_transformer,https://zenodo.org/record/4393279/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_gst+xvector_transformer,https://zenodo.org/record/4393277/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_xvector_tacotron2,https://zenodo.org/record/4394600/files/tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_gst+xvector_tacotron2,https://zenodo.org/record/4394598/files/tts_train_gst%2Bxvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_xvector_conformer_fastspeech2,https://zenodo.org/record/4394602/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +vctk,tts,kan-bayashi/vctk_gst+xvector_conformer_fastspeech2,https://zenodo.org/record/4394608/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true +libritts,tts,kan-bayashi/libritts_xvector_trasnformer,https://zenodo.org/record/4409704/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_gst+xvector_trasnformer,https://zenodo.org/record/4409702/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_xvector_conformer_fastspeech2,https://zenodo.org/record/4418754/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true +libritts,tts,kan-bayashi/libritts_gst+xvector_conformer_fastspeech2,https://zenodo.org/record/4418774/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true diff --git a/setup.py b/setup.py index af77f8f..c7b1833 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ dirname = os.path.dirname(__file__) setup( name="espnet_model_zoo", - version="0.0.0a21", + version="0.0.0a22", url="http://github.com/espnet/espnet_model_zoo", description="ESPnet Model Zoo", long_description=open(os.path.join(dirname, "README.md"), encoding="utf-8").read(),