-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add readalong-html synthesize output format
And rename readalong -> readalong-xml for clarity, as suggested by @roedoejet
- Loading branch information
Showing
4 changed files
with
196 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -214,7 +214,7 @@ def test_writing_readalong(self): | |
tmp_dir = Path(tmp_dir) | ||
with silence_c_stderr(): | ||
writer = get_synthesis_output_callbacks( | ||
[SynthesizeOutputFormats.readalong], | ||
[SynthesizeOutputFormats.readalong_xml], | ||
config=FastSpeech2Config(contact=self.contact), | ||
global_step=77, | ||
output_dir=tmp_dir, | ||
|
@@ -247,6 +247,70 @@ def test_writing_readalong(self): | |
self.assertIn('<w time="0.0" dur=', readalong) | ||
|
||
|
||
class TestWritingOfflineRAS(WritingTestBase): | ||
""" | ||
Testing the callback that writes Offline HTML readalong files. | ||
""" | ||
|
||
def test_writing_offline_ras(self): | ||
with TemporaryDirectory() as tmp_dir: | ||
tmp_dir = Path(tmp_dir) | ||
vocoder, vocoder_path = get_dummy_vocoder(tmp_dir) | ||
with silence_c_stderr(): | ||
writers = get_synthesis_output_callbacks( | ||
[SynthesizeOutputFormats.readalong_html], | ||
config=FastSpeech2Config( | ||
contact=self.contact, | ||
training=FastSpeech2TrainingConfig(vocoder_path=vocoder_path), | ||
), | ||
global_step=77, | ||
output_dir=tmp_dir, | ||
output_key=self.output_key, | ||
device=torch.device("cpu"), | ||
vocoder_model=vocoder, | ||
vocoder_config=vocoder.config, | ||
vocoder_global_step=10, | ||
) | ||
for writer in writers: | ||
writer.on_predict_batch_end( | ||
_trainer=None, | ||
_pl_module=None, | ||
outputs=self.outputs, | ||
batch=self.batch, | ||
_batch_idx=0, | ||
_dataloader_idx=0, | ||
) | ||
output_dir = writer.save_dir | ||
|
||
# print(output_dir, *output_dir.glob("**/*")) # For debugging | ||
output_files = ( | ||
output_dir / "short--spk1--lngA--22050-mel-librosa.html", | ||
output_dir | ||
/ "This utterance is way too long--spk2--lngB--22050-mel-librosa.html", | ||
) | ||
for output_file in output_files: | ||
with self.subTest(output_file=output_file): | ||
self.assertTrue(output_file.exists()) | ||
with open(output_file, "r", encoding="utf8") as f: | ||
readalong = f.read() | ||
# print(readalong) | ||
self.assertIn("<read-along ", readalong) | ||
self.assertIn("<span slot", readalong) | ||
|
||
|
||
def get_dummy_vocoder(tmp_dir: Path) -> tuple[HiFiGAN, Path]: | ||
contact_info = ContactInformation( | ||
contact_name="Test Runner", contact_email="[email protected]" | ||
) | ||
vocoder = HiFiGAN(HiFiGANConfig(contact=contact_info)) | ||
with silence_c_stderr(): | ||
trainer = Trainer(default_root_dir=str(tmp_dir), barebones=True) | ||
trainer.strategy.connect(vocoder) | ||
vocoder_path = tmp_dir / "vocoder" | ||
trainer.save_checkpoint(vocoder_path) | ||
return vocoder, vocoder_path | ||
|
||
|
||
class TestWritingWav(WritingTestBase): | ||
""" | ||
Testing the callback that writes wav files. | ||
|
@@ -260,15 +324,7 @@ def test_filenames_not_truncated(self): | |
""" | ||
with TemporaryDirectory() as tmp_dir: | ||
tmp_dir = Path(tmp_dir) | ||
contact_info = ContactInformation( | ||
contact_name="Test Runner", contact_email="[email protected]" | ||
) | ||
vocoder = HiFiGAN(HiFiGANConfig(contact=contact_info)) | ||
with silence_c_stderr(): | ||
trainer = Trainer(default_root_dir=str(tmp_dir), barebones=True) | ||
trainer.strategy.connect(vocoder) | ||
vocoder_path = Path(tmp_dir) / "vocoder" | ||
trainer.save_checkpoint(vocoder_path) | ||
vocoder, vocoder_path = get_dummy_vocoder(tmp_dir) | ||
|
||
with silence_c_stderr(): | ||
writer = get_synthesis_output_callbacks( | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters