From b8473a9ee5e07ac98e7638abd17ea566d717cea4 Mon Sep 17 00:00:00 2001 From: Lengyue Date: Wed, 1 May 2024 00:37:57 -0400 Subject: [PATCH] Add i18n engine --- .gitignore | 1 + fish_speech/i18n/__init__.py | 3 + fish_speech/i18n/core.py | 40 ++++++++++ fish_speech/i18n/locale/en_US.json | 25 ++++++ fish_speech/i18n/locale/zh_CN.json | 25 ++++++ fish_speech/i18n/scan.py | 122 +++++++++++++++++++++++++++++ fish_speech/webui/manage.py | 10 ++- tools/webui.py | 55 ++++++------- 8 files changed, 251 insertions(+), 30 deletions(-) create mode 100644 fish_speech/i18n/__init__.py create mode 100644 fish_speech/i18n/core.py create mode 100644 fish_speech/i18n/locale/en_US.json create mode 100644 fish_speech/i18n/locale/zh_CN.json create mode 100644 fish_speech/i18n/scan.py diff --git a/.gitignore b/.gitignore index a97f3eda..ee2826f7 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ ffmpeg.exe asr-label-win-x64.exe /.cache /fishenv +/.locale diff --git a/fish_speech/i18n/__init__.py b/fish_speech/i18n/__init__.py new file mode 100644 index 00000000..981dbb3b --- /dev/null +++ b/fish_speech/i18n/__init__.py @@ -0,0 +1,3 @@ +from .core import i18n + +__all__ = ["i18n"] diff --git a/fish_speech/i18n/core.py b/fish_speech/i18n/core.py new file mode 100644 index 00000000..9f793ec9 --- /dev/null +++ b/fish_speech/i18n/core.py @@ -0,0 +1,40 @@ +import json +import locale +from pathlib import Path + +I18N_FILE_PATH = Path(__file__).parent / "locale" +DEFAULT_LANGUAGE = "en_US" + + +def load_language_list(language): + with open(I18N_FILE_PATH / f"{language}.json", "r", encoding="utf-8") as f: + language_list = json.load(f) + + return language_list + + +class I18nAuto: + def __init__(self): + i18n_file = Path(".locale") + + if i18n_file.exists(): + with open(i18n_file, "r", encoding="utf-8") as f: + language = f.read().strip() + else: + # getlocale can't identify the system's language ((None, None)) + language = locale.getdefaultlocale()[0] + + if (I18N_FILE_PATH / f"{language}.json").exists() is False: + language = DEFAULT_LANGUAGE + + self.language = language + self.language_map = load_language_list(language) + + def __call__(self, key): + return self.language_map.get(key, key) + + def __repr__(self): + return "Use Language: " + self.language + + +i18n = I18nAuto() diff --git a/fish_speech/i18n/locale/en_US.json b/fish_speech/i18n/locale/en_US.json new file mode 100644 index 00000000..a5510e62 --- /dev/null +++ b/fish_speech/i18n/locale/en_US.json @@ -0,0 +1,25 @@ +{ + "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.", + "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).", + "Advanced Config": "Advanced Config", + "Enable Reference Audio": "Enable Reference Audio", + "Error Message": "Error Message", + "Generate": "Generate", + "Generated Audio": "Generated Audio", + "Infer interface is closed": "Infer interface is closed", + "Inferring interface is launched at {}": "Inferring interface is launched at {}", + "Input Text": "Input Text", + "Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off", + "Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit", + "Opened labeler in browser": "Opened labeler in browser", + "Put your text here.": "Put your text here.", + "Reference Audio": "Reference Audio", + "Reference Text": "Reference Text", + "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.", + "Repetition Penalty": "Repetition Penalty", + "Speaker": "Speaker", + "Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.", + "Type name of the speaker": "Type name of the speaker", + "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.", + "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1)." +} diff --git a/fish_speech/i18n/locale/zh_CN.json b/fish_speech/i18n/locale/zh_CN.json new file mode 100644 index 00000000..034b45c5 --- /dev/null +++ b/fish_speech/i18n/locale/zh_CN.json @@ -0,0 +1,25 @@ +{ + "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频,适用于指定音色。", + "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.", + "Advanced Config": "高级参数", + "Enable Reference Audio": "启用参考音频", + "Error Message": "错误信息", + "Generate": "生成", + "Generated Audio": "音频", + "Infer interface is closed": "推理界面已关闭", + "Inferring interface is launched at {}": "推理界面已在 {} 上启动", + "Input Text": "输入文本", + "Iterative Prompt Length, 0 means off": "迭代提示长度,0 表示关闭", + "Maximum tokens per batch, 0 means no limit": "每批最大令牌数,0 表示无限制", + "Opened labeler in browser": "在浏览器中打开标注工具", + "Put your text here.": "在此处输入文本.", + "Reference Audio": "参考音频", + "Reference Text": "参考文本", + "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.", + "Repetition Penalty": "重复惩罚", + "Speaker": "说话人", + "Text is too long, please keep it under {} characters.": "文本太长,请保持在 {} 个字符以内.", + "Type name of the speaker": "输入说话人的名称", + "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规.", + "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型." +} diff --git a/fish_speech/i18n/scan.py b/fish_speech/i18n/scan.py new file mode 100644 index 00000000..d0194c0f --- /dev/null +++ b/fish_speech/i18n/scan.py @@ -0,0 +1,122 @@ +import ast +import glob +import json +from collections import OrderedDict +from pathlib import Path + +from loguru import logger + +from .core import DEFAULT_LANGUAGE, I18N_FILE_PATH + + +def extract_i18n_strings(node): + i18n_strings = [] + + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == "i18n" + ): + for arg in node.args: + if isinstance(arg, ast.Str): + i18n_strings.append(arg.s) + + for child_node in ast.iter_child_nodes(node): + i18n_strings.extend(extract_i18n_strings(child_node)) + + return i18n_strings + + +# scan the directory for all .py files (recursively) +# for each file, parse the code into an AST +# for each AST, extract the i18n strings + +strings = [] +folders = ["fish_speech", "tools"] +# for filename in glob.iglob("**/*.py", recursive=True): +for folder in folders: + for f in Path(folder).rglob("*.py"): + code = f.read_text(encoding="utf-8") + if "i18n(" in code: + tree = ast.parse(code) + i18n_strings = extract_i18n_strings(tree) + logger.info(f"Found {len(i18n_strings)} i18n strings in {f}") + strings.extend(i18n_strings) + +code_keys = set(strings) +logger.info(f"Total unique: {len(code_keys)}") + + +standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json" +with open(standard_file, "r", encoding="utf-8") as f: + standard_data = json.load(f, object_pairs_hook=OrderedDict) +standard_keys = set(standard_data.keys()) + +# Define the standard file name +unused_keys = standard_keys - code_keys +logger.info(f"Found {len(unused_keys)} unused keys in {standard_file}") +for unused_key in unused_keys: + logger.info(f"\t{unused_key}") + +missing_keys = code_keys - standard_keys +logger.info(f"Found {len(missing_keys)} missing keys in {standard_file}") +for missing_key in missing_keys: + logger.info(f"\t{missing_key}") + +code_keys_dict = OrderedDict() +for s in strings: + code_keys_dict[s] = s + +# write back +with open(standard_file, "w", encoding="utf-8") as f: + json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) + f.write("\n") + +logger.info(f"Updated {standard_file}") + + +# Define the standard file name +standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json" + +# Find all JSON files in the directory +dir_path = I18N_FILE_PATH +languages = [f for f in dir_path.glob("*.json") if f.stem != DEFAULT_LANGUAGE] + +# Load the standard file +with open(standard_file, "r", encoding="utf-8") as f: + standard_data = json.load(f, object_pairs_hook=OrderedDict) + +# Loop through each language file +for lang_file in languages: + # Load the language file + with open(lang_file, "r", encoding="utf-8") as f: + lang_data = json.load(f, object_pairs_hook=OrderedDict) + + # Find the difference between the language file and the standard file + diff = set(standard_data.keys()) - set(lang_data.keys()) + + miss = set(lang_data.keys()) - set(standard_data.keys()) + + # Add any missing keys to the language file + for key in diff: + lang_data[key] = "#!" + key + logger.info(f"Added missing key: {key} to {lang_file}") + + # Del any extra keys to the language file + for key in miss: + del lang_data[key] + logger.info(f"Del extra key: {key} from {lang_file}") + + # Sort the keys of the language file to match the order of the standard file + lang_data = OrderedDict( + sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0])) + ) + + # Save the updated language file + with open(lang_file, "w", encoding="utf-8") as f: + json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True) + f.write("\n") + + logger.info(f"Updated {lang_file}") + +logger.info("Done") diff --git a/fish_speech/webui/manage.py b/fish_speech/webui/manage.py index 3fde3a7f..051c393c 100644 --- a/fish_speech/webui/manage.py +++ b/fish_speech/webui/manage.py @@ -17,6 +17,7 @@ from loguru import logger from tqdm import tqdm +from fish_speech.i18n import i18n from fish_speech.webui.launch_utils import Seafoam, versions_html PYTHON = os.path.join(os.environ.get("PYTHON_FOLDERPATH", ""), "python") @@ -97,7 +98,7 @@ def change_label(if_label): # 设置要访问的URL url = "https://text-labeler.pages.dev/" webbrowser.open(url) - yield f"已打开网址" + yield i18n("Opened labeler in browser") elif if_label == False: p_label = None yield "Nothing" @@ -119,7 +120,10 @@ def change_infer( env["GRADIO_SERVER_NAME"] = host env["GRADIO_SERVER_PORT"] = port # 启动第二个进程 - yield build_html_ok_message(f"推理界面已开启, 访问 http://{host}:{port}") + url = f"http://{host}:{port}" + yield build_html_ok_message( + i18n("Inferring interface is launched at {}").format(url) + ) p_infer = subprocess.Popen( [ PYTHON, @@ -140,7 +144,7 @@ def change_infer( elif if_infer == False and p_infer != None: kill_process(p_infer.pid) p_infer = None - yield build_html_error_message("推理界面已关闭") + yield build_html_error_message(i18n("Infer interface is closed")) js = load_data_in_raw("fish_speech/webui/js/animate.js") diff --git a/tools/webui.py b/tools/webui.py index 829dd7fd..17b8b4fb 100644 --- a/tools/webui.py +++ b/tools/webui.py @@ -12,6 +12,7 @@ from loguru import logger from transformers import AutoTokenizer +from fish_speech.i18n import i18n from tools.llama.generate import launch_thread_safe_queue from tools.vqgan.inference import load_model as load_vqgan_model @@ -19,22 +20,18 @@ os.environ["EINX_FILTER_TRACEBACK"] = "false" -HEADER_MD = """# Fish Speech +HEADER_MD = f"""# Fish Speech -A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio). -由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成. +{i18n("A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).")} -You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1). -你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型. +{i18n("You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).")} -Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License. -相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布. +{i18n("Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.")} -We are not responsible for any misuse of the model, please consider your local laws and regulations before using it. -我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规. +{i18n("We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.")} """ -TEXTBOX_PLACEHOLDER = """Put your text here. 在此处输入文本.""" +TEXTBOX_PLACEHOLDER = i18n("Put your text here.") try: import spaces @@ -76,7 +73,9 @@ def inference( if args.max_gradio_length > 0 and len(text) > args.max_gradio_length: return ( None, - f"Text is too long, please keep it under {args.max_gradio_length} characters.", + i18n("Text is too long, please keep it under {} characters.").format( + args.max_gradio_length + ), ) # Parse reference audio aka prompt @@ -171,13 +170,13 @@ def build_app(): with gr.Row(): with gr.Column(scale=3): text = gr.Textbox( - label="Input Text / 输入文本", placeholder=TEXTBOX_PLACEHOLDER, lines=15 + label=i18n("Input Text"), placeholder=TEXTBOX_PLACEHOLDER, lines=15 ) with gr.Row(): - with gr.Tab(label="Advanced Config / 高级参数"): + with gr.Tab(label=i18n("Advanced Config")): chunk_length = gr.Slider( - label="Iterative Prompt Length, 0 means off / 迭代提示长度,0 表示关闭", + label=i18n("Iterative Prompt Length, 0 means off"), minimum=0, maximum=500, value=30, @@ -185,7 +184,7 @@ def build_app(): ) max_new_tokens = gr.Slider( - label="Maximum tokens per batch, 0 means no limit / 每批最大令牌数,0 表示无限制", + label=i18n("Maximum tokens per batch, 0 means no limit"), minimum=0, maximum=args.max_length, value=0, # 0 means no limit @@ -201,7 +200,7 @@ def build_app(): ) repetition_penalty = gr.Slider( - label="Repetition Penalty", + label=i18n("Repetition Penalty"), minimum=0, maximum=2, value=1.5, @@ -217,40 +216,42 @@ def build_app(): ) speaker = gr.Textbox( - label="Speaker / 说话人", - placeholder="Type name of the speaker / 输入说话人的名称", + label=i18n("Speaker"), + placeholder=i18n("Type name of the speaker"), lines=1, ) - with gr.Tab(label="Reference Audio / 参考音频"): + with gr.Tab(label=i18n("Reference Audio")): gr.Markdown( - "5 to 10 seconds of reference audio, useful for specifying speaker. \n5 到 10 秒的参考音频,适用于指定音色。" + i18n( + "5 to 10 seconds of reference audio, useful for specifying speaker." + ) ) enable_reference_audio = gr.Checkbox( - label="Enable Reference Audio / 启用参考音频", + label=i18n("Enable Reference Audio"), ) reference_audio = gr.Audio( - label="Reference Audio / 参考音频", + label=i18n("Reference Audio"), type="filepath", ) reference_text = gr.Textbox( - label="Reference Text / 参考文本", - placeholder="参考文本", + label=i18n("Reference Text"), + placeholder=i18n("Reference Text"), lines=1, value="在一无所知中,梦里的一天结束了,一个新的「轮回」便会开始。", ) with gr.Column(scale=3): with gr.Row(): - error = gr.HTML(label="Error Message / 错误信息") + error = gr.HTML(label=i18n("Error Message")) with gr.Row(): - audio = gr.Audio(label="Generated Audio / 音频", type="numpy") + audio = gr.Audio(label=i18n("Generated Audio"), type="numpy") with gr.Row(): with gr.Column(scale=3): generate = gr.Button( - value="\U0001F3A7 Generate / 合成", variant="primary" + value="\U0001F3A7 " + i18n("Generate"), variant="primary" ) # # Submit