-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
251 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,4 @@ ffmpeg.exe | |
asr-label-win-x64.exe | ||
/.cache | ||
/fishenv | ||
/.locale |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .core import i18n | ||
|
||
__all__ = ["i18n"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import json | ||
import locale | ||
from pathlib import Path | ||
|
||
I18N_FILE_PATH = Path(__file__).parent / "locale" | ||
DEFAULT_LANGUAGE = "en_US" | ||
|
||
|
||
def load_language_list(language): | ||
with open(I18N_FILE_PATH / f"{language}.json", "r", encoding="utf-8") as f: | ||
language_list = json.load(f) | ||
|
||
return language_list | ||
|
||
|
||
class I18nAuto: | ||
def __init__(self): | ||
i18n_file = Path(".locale") | ||
|
||
if i18n_file.exists(): | ||
with open(i18n_file, "r", encoding="utf-8") as f: | ||
language = f.read().strip() | ||
else: | ||
# getlocale can't identify the system's language ((None, None)) | ||
language = locale.getdefaultlocale()[0] | ||
|
||
if (I18N_FILE_PATH / f"{language}.json").exists() is False: | ||
language = DEFAULT_LANGUAGE | ||
|
||
self.language = language | ||
self.language_map = load_language_list(language) | ||
|
||
def __call__(self, key): | ||
return self.language_map.get(key, key) | ||
|
||
def __repr__(self): | ||
return "Use Language: " + self.language | ||
|
||
|
||
i18n = I18nAuto() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.", | ||
"A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).", | ||
"Advanced Config": "Advanced Config", | ||
"Enable Reference Audio": "Enable Reference Audio", | ||
"Error Message": "Error Message", | ||
"Generate": "Generate", | ||
"Generated Audio": "Generated Audio", | ||
"Infer interface is closed": "Infer interface is closed", | ||
"Inferring interface is launched at {}": "Inferring interface is launched at {}", | ||
"Input Text": "Input Text", | ||
"Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off", | ||
"Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit", | ||
"Opened labeler in browser": "Opened labeler in browser", | ||
"Put your text here.": "Put your text here.", | ||
"Reference Audio": "Reference Audio", | ||
"Reference Text": "Reference Text", | ||
"Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.", | ||
"Repetition Penalty": "Repetition Penalty", | ||
"Speaker": "Speaker", | ||
"Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.", | ||
"Type name of the speaker": "Type name of the speaker", | ||
"We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.", | ||
"You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1)." | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频,适用于指定音色。", | ||
"A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.", | ||
"Advanced Config": "高级参数", | ||
"Enable Reference Audio": "启用参考音频", | ||
"Error Message": "错误信息", | ||
"Generate": "生成", | ||
"Generated Audio": "音频", | ||
"Infer interface is closed": "推理界面已关闭", | ||
"Inferring interface is launched at {}": "推理界面已在 {} 上启动", | ||
"Input Text": "输入文本", | ||
"Iterative Prompt Length, 0 means off": "迭代提示长度,0 表示关闭", | ||
"Maximum tokens per batch, 0 means no limit": "每批最大令牌数,0 表示无限制", | ||
"Opened labeler in browser": "在浏览器中打开标注工具", | ||
"Put your text here.": "在此处输入文本.", | ||
"Reference Audio": "参考音频", | ||
"Reference Text": "参考文本", | ||
"Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.", | ||
"Repetition Penalty": "重复惩罚", | ||
"Speaker": "说话人", | ||
"Text is too long, please keep it under {} characters.": "文本太长,请保持在 {} 个字符以内.", | ||
"Type name of the speaker": "输入说话人的名称", | ||
"We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规.", | ||
"You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型." | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import ast | ||
import glob | ||
import json | ||
from collections import OrderedDict | ||
from pathlib import Path | ||
|
||
from loguru import logger | ||
|
||
from .core import DEFAULT_LANGUAGE, I18N_FILE_PATH | ||
|
||
|
||
def extract_i18n_strings(node): | ||
i18n_strings = [] | ||
|
||
if ( | ||
isinstance(node, ast.Call) | ||
and isinstance(node.func, ast.Name) | ||
and node.func.id == "i18n" | ||
): | ||
for arg in node.args: | ||
if isinstance(arg, ast.Str): | ||
i18n_strings.append(arg.s) | ||
|
||
for child_node in ast.iter_child_nodes(node): | ||
i18n_strings.extend(extract_i18n_strings(child_node)) | ||
|
||
return i18n_strings | ||
|
||
|
||
# scan the directory for all .py files (recursively) | ||
# for each file, parse the code into an AST | ||
# for each AST, extract the i18n strings | ||
|
||
strings = [] | ||
folders = ["fish_speech", "tools"] | ||
# for filename in glob.iglob("**/*.py", recursive=True): | ||
for folder in folders: | ||
for f in Path(folder).rglob("*.py"): | ||
code = f.read_text(encoding="utf-8") | ||
if "i18n(" in code: | ||
tree = ast.parse(code) | ||
i18n_strings = extract_i18n_strings(tree) | ||
logger.info(f"Found {len(i18n_strings)} i18n strings in {f}") | ||
strings.extend(i18n_strings) | ||
|
||
code_keys = set(strings) | ||
logger.info(f"Total unique: {len(code_keys)}") | ||
|
||
|
||
standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json" | ||
with open(standard_file, "r", encoding="utf-8") as f: | ||
standard_data = json.load(f, object_pairs_hook=OrderedDict) | ||
standard_keys = set(standard_data.keys()) | ||
|
||
# Define the standard file name | ||
unused_keys = standard_keys - code_keys | ||
logger.info(f"Found {len(unused_keys)} unused keys in {standard_file}") | ||
for unused_key in unused_keys: | ||
logger.info(f"\t{unused_key}") | ||
|
||
missing_keys = code_keys - standard_keys | ||
logger.info(f"Found {len(missing_keys)} missing keys in {standard_file}") | ||
for missing_key in missing_keys: | ||
logger.info(f"\t{missing_key}") | ||
|
||
code_keys_dict = OrderedDict() | ||
for s in strings: | ||
code_keys_dict[s] = s | ||
|
||
# write back | ||
with open(standard_file, "w", encoding="utf-8") as f: | ||
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) | ||
f.write("\n") | ||
|
||
logger.info(f"Updated {standard_file}") | ||
|
||
|
||
# Define the standard file name | ||
standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json" | ||
|
||
# Find all JSON files in the directory | ||
dir_path = I18N_FILE_PATH | ||
languages = [f for f in dir_path.glob("*.json") if f.stem != DEFAULT_LANGUAGE] | ||
|
||
# Load the standard file | ||
with open(standard_file, "r", encoding="utf-8") as f: | ||
standard_data = json.load(f, object_pairs_hook=OrderedDict) | ||
|
||
# Loop through each language file | ||
for lang_file in languages: | ||
# Load the language file | ||
with open(lang_file, "r", encoding="utf-8") as f: | ||
lang_data = json.load(f, object_pairs_hook=OrderedDict) | ||
|
||
# Find the difference between the language file and the standard file | ||
diff = set(standard_data.keys()) - set(lang_data.keys()) | ||
|
||
miss = set(lang_data.keys()) - set(standard_data.keys()) | ||
|
||
# Add any missing keys to the language file | ||
for key in diff: | ||
lang_data[key] = "#!" + key | ||
logger.info(f"Added missing key: {key} to {lang_file}") | ||
|
||
# Del any extra keys to the language file | ||
for key in miss: | ||
del lang_data[key] | ||
logger.info(f"Del extra key: {key} from {lang_file}") | ||
|
||
# Sort the keys of the language file to match the order of the standard file | ||
lang_data = OrderedDict( | ||
sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0])) | ||
) | ||
|
||
# Save the updated language file | ||
with open(lang_file, "w", encoding="utf-8") as f: | ||
json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True) | ||
f.write("\n") | ||
|
||
logger.info(f"Updated {lang_file}") | ||
|
||
logger.info("Done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters