diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 4df00bbf..ff18dd39 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -131,7 +131,6 @@ def deal_old(wait_p_list): if send_num > 1: print("------------------------------------------------------") print(f"dealing {item.file_name} ...") - print("------------------------------------------------------") count = 0 wait_p_list = [] for i in range(0, len(p_list)): diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py index da204125..b2e823ba 100644 --- a/book_maker/translator/chatgptapi_translator.py +++ b/book_maker/translator/chatgptapi_translator.py @@ -18,11 +18,14 @@ def __init__(self, key, language, api_base=None, prompt_template=None): or "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text" ) + max_num_token = -1 + def rotate_key(self): openai.api_key = next(self.keys) def get_translation(self, text): self.rotate_key() + content = self.prompt_template.format(text=text, language=self.language) completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ @@ -32,9 +35,7 @@ def get_translation(self, text): }, { "role": "user", - "content": self.prompt_template.format( - text=text, language=self.language - ), + "content": content, }, ], ) @@ -45,11 +46,18 @@ def get_translation(self, text): .encode("utf8") .decode() ) + print("=================================================") + print(f'Total tokens used this time: {completion["usage"]["total_tokens"]}') + self.max_num_token = max( + self.max_num_token, int(completion["usage"]["total_tokens"]) + ) + print(f"The maximum number of tokens used at one time: {self.max_num_token}") return t_text - def translate(self, text): + def translate(self, text, needprint=True): # todo: Determine whether to print according to the cli option - print(re.sub("\n{3,}", "\n\n", text)) + if needprint: + print(re.sub("\n{3,}", "\n\n", text)) try: t_text = self.get_translation(text) @@ -65,15 +73,58 @@ def translate(self, text): t_text = self.get_translation(text) # todo: Determine whether to print according to the cli option - print(re.sub("\n{3,}", "\n\n", t_text)) + if needprint: + print(re.sub("\n{3,}", "\n\n", t_text)) return t_text + def translate_and_split_lines(self, text): + result_str = self.translate(text, False) + lines = result_str.split("\n") + lines = [line.strip() for line in lines if line.strip() != ""] + return lines + def translate_list(self, plist): sep = "\n\n\n\n\n" new_str = sep.join([item.text for item in plist]) - result_str = self.translate(new_str) - lines = result_str.split("\n") - lines = [line.strip() for line in lines if line.strip() != ""] + retry_count = 0 + plist_len = len(plist) + + # supplement_prompt = f"Translated result should have {plist_len} paragraphs" + # supplement_prompt = "Each paragraph in the source text should be translated into a separate and complete paragraph, and each paragraph should be separated" + supplement_prompt = "Each paragraph in the source text should be translated into a separate and complete paragraph, and each translated paragraph should be separated by a blank line" + + self.prompt_template = ( + "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text. " + + supplement_prompt + ) + + lines = self.translate_and_split_lines(new_str) + + while len(lines) != plist_len and retry_count < 15: + print( + f"bug: {plist_len} paragraphs of text translated into {len(lines)} paragraphs" + ) + num = 6 + print(f"sleep for {num}s and try again") + time.sleep(num) + print(f"retry {retry_count+1} ...") + lines = self.translate_and_split_lines(new_str) + retry_count += 1 + if len(lines) == plist_len: + print("retry success") + + if len(lines) != plist_len: + for i in range(0, plist_len): + print(plist[i].text) + print() + if i < len(lines): + print(lines[i]) + print() + + print( + f"bug: {plist_len} paragraphs of text translated into {len(lines)} paragraphs" + ) + print("continue") return lines