-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaudio.py
85 lines (64 loc) · 2.69 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
from pydub import AudioSegment
import numpy as np
import re
from scipy import signal, io
import librosa
noise_folder = '/home/ankur/Downloads/Others/noise'
clean_folder = '/home/ankur/Downloads/Others/dev-clean'
mixed_folder = '/home/ankur/Downloads/Others/mixed'
noise_files = os.listdir(noise_folder)
clean_files = os.listdir(clean_folder)
def read_audio(audio_path, target_fs=None, duration=4):
(audio, fs) = librosa.load(audio_path, sr=target_fs, duration=duration)
# print(fs)
# if this is not a mono sounds file
if audio.ndim > 1:
audio = np.mean(audio, axis=1)
if target_fs is not None and fs != target_fs:
audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
fs = target_fs
return audio, fs, librosa.get_duration(filename=audio_path)
def create_mixed_audio(zipped_list):
for (noise, clean) in list(zipped_list):
if clean.split('.')[-1] != 'txt':
noise_path = noise_folder + '/' + noise
clean_path = clean_folder + '/' + clean
# sound1 = AudioSegment.from_file(clean_path)
# sound2 = AudioSegment.from_file(noise_path)
# combined = sound1.overlay(sound2)
clean_obj, sample_rate, duration = read_audio(clean_path, target_fs=16000)
noise_obj, sample_rate, noise_duration = read_audio(noise_path, target_fs=16000, duration=duration)
audio = np.zeros((clean_obj.shape[0], 2))
to_pad = clean_obj.shape[0] - noise_obj.shape[0]
if to_pad > 0:
noise_obj = np.pad(noise_obj, (0, to_pad), 'constant')
elif to_pad < 0:
noise_obj = noise_obj[:clean_obj.shape[0]]
audio[:, 0] = clean_obj
audio[:, 1] = noise_obj
filename = clean.split('.')[0] + '__' + noise.split('.')[0] + '.wav'
file_path = mixed_folder + '/' + filename
print(filename)
maxn = np.iinfo("int16").max
print(maxn)
io.wavfile.write(filename=file_path, rate=16000, data=(audio * maxn).astype("int16"))
batches = len(clean_files) // len(noise_files)
size = len(noise_files)
size_o = len(clean_files)
zipped = []
current_idx = 0
for clean_file in clean_files:
for _ in range(3):
zipped.append((noise_files[current_idx], clean_file))
current_idx += 1
if current_idx == size:
current_idx = 0
# print(zipped)
create_mixed_audio(zipped)
# for i in range(batches):
# zipped = zip(noise_files, clean_files[i * size: (i + 1) * size])
# create_mixed_audio(zipped)
# if len(clean_files) % size > 0:
# zipped = zip(noise_files, clean_files[batches * size:])
# create_mixed_audio(zipped)