-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
125 lines (101 loc) · 4.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
from chat_gpt_service import ChatGPTService
from input_listener import InputListener
import pvporcupine
import struct
import os
import pyaudio
import openai
from tts_service import TextToSpeechService
config = json.load(open("config.json"))
openai.api_key = config["openai_key"]
if "openai_org" in config:
openai.organization = config["openai_org"]
class WakeWordDetector:
def __init__(self, library_path, model_path, keyword_paths):
self.chat_gpt_service = ChatGPTService()
# load access key from config
pv_access_key = config["pv_access_key"]
self.handle = pvporcupine.create(
keywords=["picovoice"],
access_key=pv_access_key,
# library_path=library_path,
# model_path=model_path,
# keyword_paths=keyword_paths,
sensitivities=[1],
)
self.pa = pyaudio.PyAudio()
# init listener, use values from config or default
self.listener = InputListener(
config["silence_threshold"] if "silence_threshold" in config else 75,
config["silence_duration"] if "silence_duration" in config else 1.5,
)
# get from config, or default
sound_card_name = (
config["sound_card_name"]
if "sound_card_name" in config
else "seeed-2mic-voicecard"
)
# Find the device index of the sound card
print("Looking for sound card...")
for i in range(self.pa.get_device_count()):
device_info = self.pa.get_device_info_by_index(i)
print(device_info["name"])
if sound_card_name in device_info["name"]:
print("Found sound card! Using device index: %d" % i)
self.input_device_index = i
break
else:
raise Exception("Could not find sound device")
self.speech = TextToSpeechService()#self.input_device_index)
self._init_audio_stream()
def _init_audio_stream(self):
self.audio_stream = self.pa.open(
rate=self.handle.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=self.handle.frame_length,
)
# input_device_index=self.input_device_index)
def run(self):
try:
while True:
pcm = self.audio_stream.read(self.handle.frame_length)
pcm = struct.unpack_from("h" * self.handle.frame_length, pcm)
porcupine_keyword_index = self.handle.process(pcm)
if porcupine_keyword_index >= 0:
print("Wake word detected!")
self.audio_stream.close()
self.audio_stream = None
audio_path = self.listener.listen()
print("Transcribing...")
audio_file = open(audio_path, "rb")
transcript = openai.Audio.translate("whisper-1", audio_file)
print(transcript)
print("Sending to chat GPT...")
response = self.chat_gpt_service.send_to_chat_gpt(
transcript["text"]
)
print(response)
print("Playing response...")
# play response
self.speech.speak(response)
# delete file
os.remove(audio_path)
self._init_audio_stream()
print("Listening for wake word...")
except KeyboardInterrupt:
pass
finally:
if self.audio_stream is not None:
self.audio_stream.close()
if self.pa is not None:
self.pa.terminate()
self.handle.delete()
if __name__ == "__main__":
library_path = "/path/to/porcupine/library"
model_path = "/path/to/porcupine/model"
keyword_paths = ["/path/to/porcupine/keyword"]
detector = WakeWordDetector(library_path, model_path, keyword_paths)
detector.run()