forked from facebookresearch/seamless_communication
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_tts_val.py
56 lines (43 loc) · 1.6 KB
/
test_tts_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from utils.generation import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import socket , logging, time,sys
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("localhost", 9988))
s.listen(1)
# Configure logging to write log messages to stdout
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# download and load all models
preload_models()
def text_to_speech(text):
start_time = time.time()
audio_array = generate_audio(text)
end_time = time.time()
logging.info("Time taken is {}".format(end_time - start_time))
write_wav("vallex_generation.wav", SAMPLE_RATE, audio_array)
def text_to_speech_linear(text, output_file):
start_time = time.time()
audio_array = generate_audio(text)
end_time = time.time()
logging.info("Time taken is {}".format(end_time - start_time))
write_wav(output_file, SAMPLE_RATE, audio_array)
# while True:
# conn, addr = s.accept()
# data = conn.recv(1024)
# encoding = 'utf-8'
# data = str(data, encoding)
# conn.close()
# text_to_speech(data)
#for linear strings
while True:
conn, addr = s.accept()
data = conn.recv(1024)
encoding = 'utf-8'
data = str(data, encoding)
conn.close()
text_list = data.split(";") # Assuming strings are separated by semicolon
for i, text_item in enumerate(text_list):
output_file = f'/mnt/mydisk/valle_x/VALL-E-X/vallex_generation{i}.wav'
text_to_speech_linear(text_item, output_file)
#text_to_speech(text.strip())
else:
logging.warning("No action / text provided or recognized")