Skip to content

Commit

Permalink
Merge pull request #16 from espnet/kan-bayashi-patch-1
Browse files Browse the repository at this point in the history
Add new multi-speaker pretrained models
  • Loading branch information
kamo-naoyuki authored Jan 6, 2021
2 parents a3c76e4 + f69ab15 commit 80b053a
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 35 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/model_test.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
name: Model test

on: [push, pull_request]
on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
check_skip:
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
name: Unitest

on: [push, pull_request]
on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
check_skip:
Expand Down
47 changes: 28 additions & 19 deletions ci/test_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import shutil

import numpy as np

from espnet2.bin.asr_inference import Speech2Text
Expand All @@ -6,37 +9,43 @@


def _asr(model_name):
d = ModelDownloader()
speech2text = Speech2Text(**d.download_and_unpack(model_name))
d = ModelDownloader("downloads")
speech2text = Speech2Text(**d.download_and_unpack(model_name, quiet=True))
speech = np.zeros((10000,), dtype=np.float32)
nbests = speech2text(speech)
text, *_ = nbests[0]
assert isinstance(text, str)


def _tts(model_name):
d = ModelDownloader()
text2speech = Text2Speech(**d.download_and_unpack(model_name))
speech = np.zeros((10000,), dtype=np.float32)
d = ModelDownloader("downloads")
text2speech = Text2Speech(**d.download_and_unpack(model_name, quiet=True))
inputs = {"text": "foo"}
if text2speech.use_speech:
text2speech("foo", speech=speech)
else:
text2speech("foo")
inputs["speech"] = np.zeros((10000,), dtype=np.float32)
if text2speech.tts.spk_embed_dim is not None:
inputs["spembs"] = np.zeros((text2speech.tts.spk_embed_dim,), dtype=np.float32)
text2speech(**inputs)


def test_model():
d = ModelDownloader()
tasks = ["asr", "tts"]

for task in tasks:
for model_name in d.query(task=task):
if d.query("valid", name=model_name)[0] == "false":
continue
print(f"#### Test {model_name} ####")

if task == "asr":
_asr(model_name)
elif task == "tts":
_tts(model_name)
else:
raise NotImplementedError(f"task={task}")
for corpus in list(set(d.query("corpus", task=task))):
for model_name in d.query(task=task, corpus=corpus):
if d.query("valid", name=model_name)[0] == "false":
continue
print(f"#### Test {model_name} ####")

if task == "asr":
_asr(model_name)
elif task == "tts":
_tts(model_name)
else:
raise NotImplementedError(f"task={task}")

# NOTE(kan-bayashi): remove and recreate cache dir to reduce the disk usage.
shutil.rmtree("downloads")
os.makedirs("downloads")
33 changes: 20 additions & 13 deletions espnet_model_zoo/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def str_to_hash(string: Union[str, Path]) -> str:
return hashlib.md5(str(string).encode("utf-8")).hexdigest()


def download(url, output_path, retry: int = 3, chunk_size: int = 8192):
def download(url, output_path, retry: int = 3, chunk_size: int = 8192, quiet=False):
# Set retry
session = requests.Session()
session.mount("http://", requests.adapters.HTTPAdapter(max_retries=retry))
Expand All @@ -62,17 +62,22 @@ def download(url, output_path, retry: int = 3, chunk_size: int = 8192):
# Write in temporary file
with tempfile.TemporaryDirectory() as d:
with (Path(d) / "tmp").open("wb") as f:
with tqdm(
desc=url,
total=file_size,
unit="B",
unit_scale=True,
unit_divisor=1024,
) as pbar:
if quiet:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
else:
with tqdm(
desc=url,
total=file_size,
unit="B",
unit_scale=True,
unit_divisor=1024,
) as pbar:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
pbar.update(len(chunk))

Path(output_path).parent.mkdir(parents=True, exist_ok=True)
shutil.move(Path(d) / "tmp", output_path)
Expand Down Expand Up @@ -224,7 +229,9 @@ def unpack_local_file(self, name: str = None) -> Dict[str, Union[str, List[str]]
# Extract files from archived file
return unpack(filename, outdir)

def download(self, name: str = None, version: int = -1, **kwargs: str) -> str:
def download(
self, name: str = None, version: int = -1, quiet: bool = False, **kwargs: str
) -> str:
url = self.get_url(name=name, version=version, **kwargs)
if not is_url(url) and Path(url).exists():
return url
Expand All @@ -233,7 +240,7 @@ def download(self, name: str = None, version: int = -1, **kwargs: str) -> str:
filename = self._get_file_name(url)
# Download the model file if not existing
if not (outdir / filename).exists():
download(url, outdir / filename)
download(url, outdir / filename, quiet=quiet)

# Write the url for debugging
with (outdir / "url").open("w", encoding="utf-8") as f:
Expand Down Expand Up @@ -261,7 +268,7 @@ def download(self, name: str = None, version: int = -1, **kwargs: str) -> str:
return str(outdir / filename)

def download_and_unpack(
self, name: str = None, version: int = -1, **kwargs: str
self, name: str = None, version: int = -1, quiet: bool = False, **kwargs: str
) -> Dict[str, Union[str, List[str]]]:
url = self.get_url(name=name, version=version, **kwargs)
if not is_url(url) and Path(url).exists():
Expand All @@ -278,7 +285,7 @@ def download_and_unpack(
return info

# Download the file to an unique path
filename = self.download(url)
filename = self.download(url, quiet=quiet)

# Extract files from archived file
return unpack(filename, outdir)
Expand Down
20 changes: 20 additions & 0 deletions espnet_model_zoo/table.csv
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,23 @@ jsut,tts,kan-bayashi/jsut_tts_train_fastspeech2_transformer_teacher_raw_phn_jaco
jsut,tts,kan-bayashi/jsut_tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave,https://zenodo.org/record/4391409/files/tts_train_conformer_fastspeech2_transformer_teacher_raw_phn_jaconv_pyopenjtalk_accent_train.loss.ave.zip?download=1,24000,jp,female,1.5.1,0.9.6,acd6957,true
chime4,asr,kamo-naoyuki/chime4_asr_train_asr_transformer3_raw_en_char_sp_valid.acc.ave,https://zenodo.org/record/4414883/files/asr_train_asr_transformer3_raw_en_char_sp_valid.acc.ave.zip?download=1,16000,en,,1.4.0,0.9.6,d5ddd5e,true
dirha_wsj,asr,kamo-naoyuki/dirha_wsj_asr_train_asr_transformer_cmvn_raw_char_rir_scpdatadirha_irwav.scp_noise_db_range10_17_noise_scpdatadirha_noisewav.scp_speech_volume_normalize1.0_num_workers2_rir_apply_prob1._sp_valid.acc.ave,https://zenodo.org/record/4415021/files/asr_train_asr_transformer_cmvn_raw_char_rir_scpdatadirha_irwav.scp_noise_db_range10_17_noise_scpdatadirha_noisewav.scp_speech_volume_normalize1.0_num_workers2_rir_apply_prob1._sp_valid.acc.ave.zip?download=1,16000,en,,1.5.1,0.9.6,c30ce88,true
vctk,tts,kan-bayashi/vctk_tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4393279/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4393277/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394600/files/tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394598/files/tts_train_gst%2Bxvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394602/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_tts_train_gst+xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4394608/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
libritts,tts,kan-bayashi/libritts_tts_train_xvector_trasnformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4409704/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_tts_train_gst+xvector_trasnformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave,https://zenodo.org/record/4409702/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss,https://zenodo.org/record/4418754/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_tts_train_gst+xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss,https://zenodo.org/record/4418774/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
vctk,tts,kan-bayashi/vctk_xvector_transformer,https://zenodo.org/record/4393279/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_gst+xvector_transformer,https://zenodo.org/record/4393277/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_xvector_tacotron2,https://zenodo.org/record/4394600/files/tts_train_xvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_gst+xvector_tacotron2,https://zenodo.org/record/4394598/files/tts_train_gst%2Bxvector_tacotron2_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_xvector_conformer_fastspeech2,https://zenodo.org/record/4394602/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
vctk,tts,kan-bayashi/vctk_gst+xvector_conformer_fastspeech2,https://zenodo.org/record/4394608/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,96ce09,true
libritts,tts,kan-bayashi/libritts_xvector_trasnformer,https://zenodo.org/record/4409704/files/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_gst+xvector_trasnformer,https://zenodo.org/record/4409702/files/tts_train_gst%2Bxvector_transformer_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_xvector_conformer_fastspeech2,https://zenodo.org/record/4418754/files/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
libritts,tts,kan-bayashi/libritts_gst+xvector_conformer_fastspeech2,https://zenodo.org/record/4418774/files/tts_train_gst%2Bxvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space_train.loss.ave.zip?download=1,24000,en,,1.5.1,0.9.6,861431,true
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
dirname = os.path.dirname(__file__)
setup(
name="espnet_model_zoo",
version="0.0.0a21",
version="0.0.0a22",
url="http://github.com/espnet/espnet_model_zoo",
description="ESPnet Model Zoo",
long_description=open(os.path.join(dirname, "README.md"), encoding="utf-8").read(),
Expand Down

0 comments on commit 80b053a

Please sign in to comment.