Skip to content

Commit

Permalink
Add i18n engine
Browse files Browse the repository at this point in the history
  • Loading branch information
leng-yue committed May 1, 2024
1 parent dcbe986 commit b8473a9
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 30 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ ffmpeg.exe
asr-label-win-x64.exe
/.cache
/fishenv
/.locale
3 changes: 3 additions & 0 deletions fish_speech/i18n/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .core import i18n

__all__ = ["i18n"]
40 changes: 40 additions & 0 deletions fish_speech/i18n/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json
import locale
from pathlib import Path

I18N_FILE_PATH = Path(__file__).parent / "locale"
DEFAULT_LANGUAGE = "en_US"


def load_language_list(language):
with open(I18N_FILE_PATH / f"{language}.json", "r", encoding="utf-8") as f:
language_list = json.load(f)

return language_list


class I18nAuto:
def __init__(self):
i18n_file = Path(".locale")

if i18n_file.exists():
with open(i18n_file, "r", encoding="utf-8") as f:
language = f.read().strip()
else:
# getlocale can't identify the system's language ((None, None))
language = locale.getdefaultlocale()[0]

if (I18N_FILE_PATH / f"{language}.json").exists() is False:
language = DEFAULT_LANGUAGE

self.language = language
self.language_map = load_language_list(language)

def __call__(self, key):
return self.language_map.get(key, key)

def __repr__(self):
return "Use Language: " + self.language


i18n = I18nAuto()
25 changes: 25 additions & 0 deletions fish_speech/i18n/locale/en_US.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.",
"A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).",
"Advanced Config": "Advanced Config",
"Enable Reference Audio": "Enable Reference Audio",
"Error Message": "Error Message",
"Generate": "Generate",
"Generated Audio": "Generated Audio",
"Infer interface is closed": "Infer interface is closed",
"Inferring interface is launched at {}": "Inferring interface is launched at {}",
"Input Text": "Input Text",
"Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off",
"Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit",
"Opened labeler in browser": "Opened labeler in browser",
"Put your text here.": "Put your text here.",
"Reference Audio": "Reference Audio",
"Reference Text": "Reference Text",
"Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.",
"Repetition Penalty": "Repetition Penalty",
"Speaker": "Speaker",
"Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.",
"Type name of the speaker": "Type name of the speaker",
"We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.",
"You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1)."
}
25 changes: 25 additions & 0 deletions fish_speech/i18n/locale/zh_CN.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频,适用于指定音色。",
"A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.",
"Advanced Config": "高级参数",
"Enable Reference Audio": "启用参考音频",
"Error Message": "错误信息",
"Generate": "生成",
"Generated Audio": "音频",
"Infer interface is closed": "推理界面已关闭",
"Inferring interface is launched at {}": "推理界面已在 {} 上启动",
"Input Text": "输入文本",
"Iterative Prompt Length, 0 means off": "迭代提示长度,0 表示关闭",
"Maximum tokens per batch, 0 means no limit": "每批最大令牌数,0 表示无限制",
"Opened labeler in browser": "在浏览器中打开标注工具",
"Put your text here.": "在此处输入文本.",
"Reference Audio": "参考音频",
"Reference Text": "参考文本",
"Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.",
"Repetition Penalty": "重复惩罚",
"Speaker": "说话人",
"Text is too long, please keep it under {} characters.": "文本太长,请保持在 {} 个字符以内.",
"Type name of the speaker": "输入说话人的名称",
"We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规.",
"You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型."
}
122 changes: 122 additions & 0 deletions fish_speech/i18n/scan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import ast
import glob
import json
from collections import OrderedDict
from pathlib import Path

from loguru import logger

from .core import DEFAULT_LANGUAGE, I18N_FILE_PATH


def extract_i18n_strings(node):
i18n_strings = []

if (
isinstance(node, ast.Call)
and isinstance(node.func, ast.Name)
and node.func.id == "i18n"
):
for arg in node.args:
if isinstance(arg, ast.Str):
i18n_strings.append(arg.s)

for child_node in ast.iter_child_nodes(node):
i18n_strings.extend(extract_i18n_strings(child_node))

return i18n_strings


# scan the directory for all .py files (recursively)
# for each file, parse the code into an AST
# for each AST, extract the i18n strings

strings = []
folders = ["fish_speech", "tools"]
# for filename in glob.iglob("**/*.py", recursive=True):
for folder in folders:
for f in Path(folder).rglob("*.py"):
code = f.read_text(encoding="utf-8")
if "i18n(" in code:
tree = ast.parse(code)
i18n_strings = extract_i18n_strings(tree)
logger.info(f"Found {len(i18n_strings)} i18n strings in {f}")
strings.extend(i18n_strings)

code_keys = set(strings)
logger.info(f"Total unique: {len(code_keys)}")


standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json"
with open(standard_file, "r", encoding="utf-8") as f:
standard_data = json.load(f, object_pairs_hook=OrderedDict)
standard_keys = set(standard_data.keys())

# Define the standard file name
unused_keys = standard_keys - code_keys
logger.info(f"Found {len(unused_keys)} unused keys in {standard_file}")
for unused_key in unused_keys:
logger.info(f"\t{unused_key}")

missing_keys = code_keys - standard_keys
logger.info(f"Found {len(missing_keys)} missing keys in {standard_file}")
for missing_key in missing_keys:
logger.info(f"\t{missing_key}")

code_keys_dict = OrderedDict()
for s in strings:
code_keys_dict[s] = s

# write back
with open(standard_file, "w", encoding="utf-8") as f:
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
f.write("\n")

logger.info(f"Updated {standard_file}")


# Define the standard file name
standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json"

# Find all JSON files in the directory
dir_path = I18N_FILE_PATH
languages = [f for f in dir_path.glob("*.json") if f.stem != DEFAULT_LANGUAGE]

# Load the standard file
with open(standard_file, "r", encoding="utf-8") as f:
standard_data = json.load(f, object_pairs_hook=OrderedDict)

# Loop through each language file
for lang_file in languages:
# Load the language file
with open(lang_file, "r", encoding="utf-8") as f:
lang_data = json.load(f, object_pairs_hook=OrderedDict)

# Find the difference between the language file and the standard file
diff = set(standard_data.keys()) - set(lang_data.keys())

miss = set(lang_data.keys()) - set(standard_data.keys())

# Add any missing keys to the language file
for key in diff:
lang_data[key] = "#!" + key
logger.info(f"Added missing key: {key} to {lang_file}")

# Del any extra keys to the language file
for key in miss:
del lang_data[key]
logger.info(f"Del extra key: {key} from {lang_file}")

# Sort the keys of the language file to match the order of the standard file
lang_data = OrderedDict(
sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0]))
)

# Save the updated language file
with open(lang_file, "w", encoding="utf-8") as f:
json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True)
f.write("\n")

logger.info(f"Updated {lang_file}")

logger.info("Done")
10 changes: 7 additions & 3 deletions fish_speech/webui/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from loguru import logger
from tqdm import tqdm

from fish_speech.i18n import i18n
from fish_speech.webui.launch_utils import Seafoam, versions_html

PYTHON = os.path.join(os.environ.get("PYTHON_FOLDERPATH", ""), "python")
Expand Down Expand Up @@ -97,7 +98,7 @@ def change_label(if_label):
# 设置要访问的URL
url = "https://text-labeler.pages.dev/"
webbrowser.open(url)
yield f"已打开网址"
yield i18n("Opened labeler in browser")
elif if_label == False:
p_label = None
yield "Nothing"
Expand All @@ -119,7 +120,10 @@ def change_infer(
env["GRADIO_SERVER_NAME"] = host
env["GRADIO_SERVER_PORT"] = port
# 启动第二个进程
yield build_html_ok_message(f"推理界面已开启, 访问 http://{host}:{port}")
url = f"http://{host}:{port}"
yield build_html_ok_message(
i18n("Inferring interface is launched at {}").format(url)
)
p_infer = subprocess.Popen(
[
PYTHON,
Expand All @@ -140,7 +144,7 @@ def change_infer(
elif if_infer == False and p_infer != None:
kill_process(p_infer.pid)
p_infer = None
yield build_html_error_message("推理界面已关闭")
yield build_html_error_message(i18n("Infer interface is closed"))


js = load_data_in_raw("fish_speech/webui/js/animate.js")
Expand Down
55 changes: 28 additions & 27 deletions tools/webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,26 @@
from loguru import logger
from transformers import AutoTokenizer

from fish_speech.i18n import i18n
from tools.llama.generate import launch_thread_safe_queue
from tools.vqgan.inference import load_model as load_vqgan_model

# Make einx happy
os.environ["EINX_FILTER_TRACEBACK"] = "false"


HEADER_MD = """# Fish Speech
HEADER_MD = f"""# Fish Speech
A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).
由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.
{i18n("A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).")}
You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).
你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型.
{i18n("You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).")}
Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.
相关代码使用 BSD-3-Clause 许可证发布,权重使用 CC BY-NC-SA 4.0 许可证发布.
{i18n("Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.")}
We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.
我们不对模型的任何滥用负责,请在使用之前考虑您当地的法律法规.
{i18n("We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.")}
"""

TEXTBOX_PLACEHOLDER = """Put your text here. 在此处输入文本."""
TEXTBOX_PLACEHOLDER = i18n("Put your text here.")

try:
import spaces
Expand Down Expand Up @@ -76,7 +73,9 @@ def inference(
if args.max_gradio_length > 0 and len(text) > args.max_gradio_length:
return (
None,
f"Text is too long, please keep it under {args.max_gradio_length} characters.",
i18n("Text is too long, please keep it under {} characters.").format(
args.max_gradio_length
),
)

# Parse reference audio aka prompt
Expand Down Expand Up @@ -171,21 +170,21 @@ def build_app():
with gr.Row():
with gr.Column(scale=3):
text = gr.Textbox(
label="Input Text / 输入文本", placeholder=TEXTBOX_PLACEHOLDER, lines=15
label=i18n("Input Text"), placeholder=TEXTBOX_PLACEHOLDER, lines=15
)

with gr.Row():
with gr.Tab(label="Advanced Config / 高级参数"):
with gr.Tab(label=i18n("Advanced Config")):
chunk_length = gr.Slider(
label="Iterative Prompt Length, 0 means off / 迭代提示长度,0 表示关闭",
label=i18n("Iterative Prompt Length, 0 means off"),
minimum=0,
maximum=500,
value=30,
step=8,
)

max_new_tokens = gr.Slider(
label="Maximum tokens per batch, 0 means no limit / 每批最大令牌数,0 表示无限制",
label=i18n("Maximum tokens per batch, 0 means no limit"),
minimum=0,
maximum=args.max_length,
value=0, # 0 means no limit
Expand All @@ -201,7 +200,7 @@ def build_app():
)

repetition_penalty = gr.Slider(
label="Repetition Penalty",
label=i18n("Repetition Penalty"),
minimum=0,
maximum=2,
value=1.5,
Expand All @@ -217,40 +216,42 @@ def build_app():
)

speaker = gr.Textbox(
label="Speaker / 说话人",
placeholder="Type name of the speaker / 输入说话人的名称",
label=i18n("Speaker"),
placeholder=i18n("Type name of the speaker"),
lines=1,
)

with gr.Tab(label="Reference Audio / 参考音频"):
with gr.Tab(label=i18n("Reference Audio")):
gr.Markdown(
"5 to 10 seconds of reference audio, useful for specifying speaker. \n5 到 10 秒的参考音频,适用于指定音色。"
i18n(
"5 to 10 seconds of reference audio, useful for specifying speaker."
)
)

enable_reference_audio = gr.Checkbox(
label="Enable Reference Audio / 启用参考音频",
label=i18n("Enable Reference Audio"),
)
reference_audio = gr.Audio(
label="Reference Audio / 参考音频",
label=i18n("Reference Audio"),
type="filepath",
)
reference_text = gr.Textbox(
label="Reference Text / 参考文本",
placeholder="参考文本",
label=i18n("Reference Text"),
placeholder=i18n("Reference Text"),
lines=1,
value="在一无所知中,梦里的一天结束了,一个新的「轮回」便会开始。",
)

with gr.Column(scale=3):
with gr.Row():
error = gr.HTML(label="Error Message / 错误信息")
error = gr.HTML(label=i18n("Error Message"))
with gr.Row():
audio = gr.Audio(label="Generated Audio / 音频", type="numpy")
audio = gr.Audio(label=i18n("Generated Audio"), type="numpy")

with gr.Row():
with gr.Column(scale=3):
generate = gr.Button(
value="\U0001F3A7 Generate / 合成", variant="primary"
value="\U0001F3A7 " + i18n("Generate"), variant="primary"
)

# # Submit
Expand Down

0 comments on commit b8473a9

Please sign in to comment.