-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathllm.py
58 lines (49 loc) · 1.49 KB
/
llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Module to manage LLM calls."""
import io
from openai import OpenAI
from openai.types.chat.parsed_chat_completion import ContentType
from model import DamageReport
from prompt import prompt
client = OpenAI()
def call_transcription(audio_file: io.BytesIO) -> str:
"""Call the transcription API with the audio file."""
translation = client.audio.translations.create(
model="whisper-1",
file=audio_file,
)
return translation.text
def call_llm(
audio_transcript: str | None, base64_images: list[str]
) -> ContentType | None:
"""Call the LLM with the list of images."""
if not audio_transcript:
user_input = []
else:
user_input = [{"type": "text", "text": "<Record>: " + audio_transcript}]
for base64_image in base64_images:
content = {
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
}
user_input.append(content)
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": prompt,
},
],
},
{
"role": "user",
"content": user_input,
},
],
response_format=DamageReport,
temperature=0.0,
)
return response.choices[0].message.parsed