forked from pannous/tensorflow-speech-recognition
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecord.py
executable file
·121 lines (107 loc) · 2.8 KB
/
record.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
import subprocess
import skimage.io
import traceback
import numpy
import numpy as np
import os
import sys
from os import system
from platform import system as platform
import skimage.io
import wave
import pyaudio
import matplotlib.pyplot as plt
plt.matshow([[1,0],[0,1]], fignum=1)
plt.draw()
if platform() == 'Darwin': # How Mac OS X is identified by Python
system('''/usr/bin/osascript -e 'tell app "Finder" to set frontmost of process "Python" to true' ''')
i = 0
width=256
height=256
# CHUNK = 512
# CHUNK = 1024
# CHUNK = 1024
# CHUNK = 2048
CHUNK = 4096
# CHUNK = 9192
# length=512
length = 1024
# length=2048
# length = 4096
# step=32
# step=64
# step = 128
step=256
# step=512
# width=512
image=numpy.array(bytearray(os.urandom(width*width))) # 512,512)
image=image.reshape(width,width)
def get_audio_input_stream():
INDEX = 0 # 1
# FORMAT = pyaudio.paInt8
FORMAT = pyaudio.paInt16
# FORMAT = pyaudio.paInt32
# FORMAT = pyaudio.paFloat32
CHANNELS = 1
# RATE = 22500
RATE = 48000 #* 2 = 96000Hz max on mac
INPUT_BLOCK_TIME = 0.05
# INPUT_BLOCK_TIME = 0.1
INPUT_FRAMES_PER_BLOCK = int(RATE * INPUT_BLOCK_TIME)
stream = pyaudio.PyAudio().open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=INDEX)
return stream
def record():
global i
global image
global winName
FILENAME = 'recording.wav'
stream = get_audio_input_stream()
# r = numpy.array()
hamming_window = np.hamming(length) # minimize fourier frequency drain
#hamming hanning bartlett 'blackman'
r = numpy.empty(length)
offset = 0
while True:
try:
dataraw = stream.read(CHUNK)
except IOError as e:
print(e) # [Errno -9981] Input overflowed WHY?
stream=get_audio_input_stream()
pass
data0 = numpy.fromstring(dataraw, dtype='int16')
# data0 = numpy.fromstring(dataraw, dtype='int8')
if(i<20 and numpy.sum(np.abs(data0))<1000*width):
continue
r=numpy.append(r,data0)
while offset < r.size - length :
data = r[offset:offset+length]
data=data*hamming_window # minimize fourier frequency drain
offset=offset + step
data = numpy.fft.fft(data)#.abs()
data = numpy.absolute(data)
data = data[0:height]/256.0#.split(data,512)
data = numpy.log2(data*0.05+1.0)#//*50.0;
numpy.putmask(data, data > 255, 255)
image[i] = data
i = i+1
if(i==width):
print("i %d\r"%i)
i=0
# image=image.T
image=numpy.rot90(image)
plt.matshow(image, fignum=1)
plt.draw()
plt.pause(0.01)
# result=spec2word(image) #todo: reconnect
# subprocess.call(["say"," %s"%result])
# cv2.imshow(winName,image)
# if cv2.waitKey(10) == 27: BREAKS portAudio !!
if __name__ == '__main__':
record()