-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnative.py
95 lines (77 loc) · 3.47 KB
/
native.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import speech_recognition as sr
import openai
import pyaudio
# Initialize recognizer and OpenAI client
recognizer = sr.Recognizer()
client = openai.OpenAI()
voice="nova"
# Set the recording and audio playback parameters
SAMPLE_RATE = 16000 # Valid argument for Microphone
CHUNK = 1024
HW_DEVICE_INDEX = 1 # Replace with the correct device index for HW:1,0
# Initialize PyAudio for audio output
p = pyaudio.PyAudio()
# Function to perform speech-to-text and send to OpenAI GPT-4o-mini model
def listen_and_speak():
with sr.Microphone(sample_rate=SAMPLE_RATE) as source: # No 'channels' argument
print("Ortam gürültüsüne göre ayarlama yapılıyor... Lütfen bekleyin.")
recognizer.adjust_for_ambient_noise(source)
while True:
print("Dinleniyor... (Çıkmak için CTRL + C)")
try:
# Capture the audio from the microphone
audio_data = recognizer.listen(source)
# Perform speech-to-text in Turkish
text = recognizer.recognize_google(audio_data, language='tr-TR')
print("Dediğiniz: " + text)
# Prepate prompt
prompt = f"lütfen bilimsel kısa bir cevap ver {text}"
# Prepare the conversation history with recognized text
conversation_history = [{"role": "user", "content": prompt}]
# Send the recognized text to OpenAI GPT model
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=conversation_history,
temperature=1,
max_tokens=2048,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
response_format={
"type": "text"
}
)
# Get the response from GPT
gpt_response = completion.choices[0].message.content
print("OpenAI Yanıtı: " + gpt_response)
# Now speak the response using OpenAI's text-to-speech
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=24000,
output=True,
output_device_index=HW_DEVICE_INDEX) # Specify device index
# Create a TTS request and stream the response
with client.audio.speech.with_streaming_response.create(
model="tts-1",
voice=voice,
input=gpt_response,
response_format="pcm") as tts_response:
# Stream the TTS response and play audio
for chunk in tts_response.iter_bytes(CHUNK):
stream.write(chunk)
# Close the stream after speaking
stream.stop_stream()
stream.close()
except sr.UnknownValueError:
print("Ses anlaşılamadı.")
except sr.RequestError as e:
print(f"Google Ses Tanıma servisine ulaşılamıyor; {e}")
except Exception as e:
print(f"OpenAI API hatası: {e}")
# Call the function to continuously listen, recognize speech, and speak the response
try:
listen_and_speak()
except KeyboardInterrupt:
print("\nDinleme durduruldu.")
# Terminate PyAudio when done
p.terminate()