Skip to content

Commit

Permalink
prompt and retry
Browse files Browse the repository at this point in the history
  • Loading branch information
hleft committed Mar 11, 2023
1 parent 3523469 commit 9aade4b
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 10 deletions.
1 change: 0 additions & 1 deletion book_maker/loader/epub_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def deal_old(wait_p_list):
if send_num > 1:
print("------------------------------------------------------")
print(f"dealing {item.file_name} ...")
print("------------------------------------------------------")
count = 0
wait_p_list = []
for i in range(0, len(p_list)):
Expand Down
69 changes: 60 additions & 9 deletions book_maker/translator/chatgptapi_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ def __init__(self, key, language, api_base=None, prompt_template=None):
or "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text"
)

max_num_token = -1

def rotate_key(self):
openai.api_key = next(self.keys)

def get_translation(self, text):
self.rotate_key()
content = self.prompt_template.format(text=text, language=self.language)
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
Expand All @@ -32,9 +35,7 @@ def get_translation(self, text):
},
{
"role": "user",
"content": self.prompt_template.format(
text=text, language=self.language
),
"content": content,
},
],
)
Expand All @@ -45,11 +46,18 @@ def get_translation(self, text):
.encode("utf8")
.decode()
)
print("=================================================")
print(f'Total tokens used this time: {completion["usage"]["total_tokens"]}')
self.max_num_token = max(
self.max_num_token, int(completion["usage"]["total_tokens"])
)
print(f"The maximum number of tokens used at one time: {self.max_num_token}")
return t_text

def translate(self, text):
def translate(self, text, needprint=True):
# todo: Determine whether to print according to the cli option
print(re.sub("\n{3,}", "\n\n", text))
if needprint:
print(re.sub("\n{3,}", "\n\n", text))

try:
t_text = self.get_translation(text)
Expand All @@ -65,15 +73,58 @@ def translate(self, text):
t_text = self.get_translation(text)

# todo: Determine whether to print according to the cli option
print(re.sub("\n{3,}", "\n\n", t_text))
if needprint:
print(re.sub("\n{3,}", "\n\n", t_text))
return t_text

def translate_and_split_lines(self, text):
result_str = self.translate(text, False)
lines = result_str.split("\n")
lines = [line.strip() for line in lines if line.strip() != ""]
return lines

def translate_list(self, plist):
sep = "\n\n\n\n\n"
new_str = sep.join([item.text for item in plist])
result_str = self.translate(new_str)

lines = result_str.split("\n")
lines = [line.strip() for line in lines if line.strip() != ""]
retry_count = 0
plist_len = len(plist)

# supplement_prompt = f"Translated result should have {plist_len} paragraphs"
# supplement_prompt = "Each paragraph in the source text should be translated into a separate and complete paragraph, and each paragraph should be separated"
supplement_prompt = "Each paragraph in the source text should be translated into a separate and complete paragraph, and each translated paragraph should be separated by a blank line"

self.prompt_template = (
"Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text. "
+ supplement_prompt
)

lines = self.translate_and_split_lines(new_str)

while len(lines) != plist_len and retry_count < 15:
print(
f"bug: {plist_len} paragraphs of text translated into {len(lines)} paragraphs"
)
num = 6
print(f"sleep for {num}s and try again")
time.sleep(num)
print(f"retry {retry_count+1} ...")
lines = self.translate_and_split_lines(new_str)
retry_count += 1
if len(lines) == plist_len:
print("retry success")

if len(lines) != plist_len:
for i in range(0, plist_len):
print(plist[i].text)
print()
if i < len(lines):
print(lines[i])
print()

print(
f"bug: {plist_len} paragraphs of text translated into {len(lines)} paragraphs"
)
print("continue")

return lines

0 comments on commit 9aade4b

Please sign in to comment.