-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcompute_features.py
112 lines (89 loc) · 3.57 KB
/
compute_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os,sys
import numpy as np
import re
import scipy
import librosa
def read_audio(audio_path, target_fs=None, duration=4):
(audio, fs) = librosa.load(audio_path, sr=target_fs, duration=duration)
# print(fs)
# if this is not a mono sounds file
if audio.ndim > 1:
audio = np.mean(audio, axis=1)
if target_fs is not None and fs != target_fs:
audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
fs = target_fs
return audio, fs, librosa.get_duration(filename=audio_path)
def readAudio(filein, duration=4):
audioObj, sampleRate, duration = read_audio(filein, target_fs=16000, duration=duration)
bitrate = audioObj.dtype
try:
maxv = np.finfo(bitrate).max
except:
maxv = np.iinfo(bitrate).max
return audioObj.astype('float')/maxv, sampleRate, bitrate, duration
def readAudioScipy(filein):
sampleRate, audioObj = scipy.io.wavfile.read(filein)
bitrate = audioObj.dtype
try:
maxv = np.finfo(bitrate).max
except:
maxv = np.iinfo(bitrate).max
return audioObj.astype('float'), sampleRate, bitrate
if __name__ == "__main__":
db = '/home/ankur/Downloads/Others/mixed1'
feature_path = '/home/ankur/Downloads/Others/features'
n_window = 1024
n_overlap = 256
ham_win = np.hamming(n_window)
for filename in os.listdir(db):
if filename.endswith(".wav"):
print(filename)
audioObj, sampleRate, bitrate = readAudioScipy(os.path.join(db, filename))
assert sampleRate == 16000,"Sample rate needs to be 16000"
# print(audioObj.shape)
audio = np.zeros((audioObj.shape[0],))
clean = np.zeros((audioObj.shape[0],))
noise = np.zeros((audioObj.shape[0],))
audio = audioObj[:,0] + audioObj[:,1] #create mixture voice + accompaniment
[f, t, mixed_spec] = scipy.signal.spectral.spectrogram(
x=audio,
window=ham_win,
nperseg=n_window,
noverlap=n_overlap,
detrend=False,
return_onesided=True,
mode='magnitude')
# print(mixed_spec.T.shape)
clean = audioObj[:,0] #voice
noise = audioObj[:,1] #accompaniment
[f, t, clean_spec] = scipy.signal.spectral.spectrogram(
x=clean,
window=ham_win,
nperseg=n_window,
noverlap=n_overlap,
detrend=False,
return_onesided=True,
mode='magnitude')
# print(clean_spec.T.shape)
[f, t, noise_spec] = scipy.signal.spectral.spectrogram(
x=noise,
window=ham_win,
nperseg=n_window,
noverlap=n_overlap,
detrend=False,
return_onesided=True,
mode='magnitude')
# print(noise_spec.T.shape)
mask = np.zeros(mixed_spec.shape)
for i in range(mixed_spec.shape[0]):
for j in range(mixed_spec.shape[1]):
if clean_spec[i][j] >= noise_spec[i][j]:
mask[i][j] = 1
else:
mask[i][j] = 0
# print(mask.shape)
audioObj=None
if not os.path.exists(feature_path):
os.makedirs(feature_path)
np.save(os.path.join(feature_path, filename.replace('.wav', '__spec.npy')), mixed_spec.T)
np.save(os.path.join(feature_path, filename.replace('.wav', '__mask.npy')), mask.T)