-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathTraining Process.py
189 lines (132 loc) · 5.44 KB
/
Training Process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from __future__ import unicode_literals
import matplotlib.pyplot as plt
from scipy.io import wavfile
import numpy as np
import wave
import sys
import librosa
import librosa.display
import os
import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
# ### Obtaining Data
# In[2]:
mfccs = []
deltas = []
delta_deltas = []
label = []
X = []
path_hyena = 'data/processed data/hyenas/'
path_lion = 'data/processed data/lions/'
min_shape = 1000
for file in os.listdir(path_hyena):
hyena = np.load(path_hyena + file)
if min_shape > hyena[0].shape[1]:
min_shape = hyena[0].shape[1]
mfcc = np.asarray(hyena[0][:, :300])
delta = np.asarray(hyena[1][:, :300])
delta_delta = np.asarray(hyena[2][:, :300])
mfccs.append(mfcc)
deltas.append(delta)
delta_deltas.append(delta_delta)
X.append([mfcc, delta, delta_delta])
label.append(0)
for file in os.listdir(path_lion):
lion = np.load(path_lion + file)
if min_shape > lion[0].shape[1]:
min_shape = lion[0].shape[1]
mfcc = np.asarray(lion[0][:, :300])
delta = np.asarray(lion[1][:, :300])
delta_delta = np.asarray(lion[2][:, :300])
mfccs.append(mfcc)
deltas.append(delta)
delta_deltas.append(delta_delta)
X.append([mfcc, delta, delta_delta])
label.append(1)
print('Minimum length all of our tensors is: %i' % (min_shape))
# So now we have obtained the following for our input data:
# 1. **mfccs[i]**: the mel-frequency cepstrum coefficients for a single audio file indexed at *i*
# 2. **deltas[i]**: the first derivative of the mfccs
# 3. **delta_deltas[i]**: the second derivative of the mfccs
#
# and our associated label:
# 1. **label[i]**: the audio file indexed at *i* will be 1 if it is a lion and 0 if it is a hyena
#
# Now splitting our data and randomizing it:
# In[3]:
split_test = StratifiedShuffleSplit(n_splits = 1, test_size = 0.2, random_state=0)
split_val = split_1 = StratifiedShuffleSplit(n_splits = 1, test_size = 0.1, random_state=0)
X = np.asarray(X)
label = np.asarray(label)
for train_index, test_index in split_test.split(X, label):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = label[train_index], label[test_index]
for train_index, test_index in split_val.split(X_train, y_train):
X_train, X_val = X_train[train_index], X_train[test_index]
y_train, y_val = y_train[train_index], y_train[test_index]
# printing out number of classes in each
print('Training has %d samples of class 0 and %d samples of class 1' % (np.unique(y_train, return_counts=True)[1][0], np.unique(y_train, return_counts=True)[1][1]))
print('Validation has %d samples of class 0 and %d samples of class 1' % (np.unique(y_val, return_counts=True)[1][0], np.unique(y_val, return_counts=True)[1][1]))
print('Testing has %d samples of class 0 and %d samples of class 1' % (np.unique(y_test, return_counts=True)[1][0], np.unique(y_test, return_counts=True)[1][1]))
# ### Network
# In[66]:
class Net(nn.Module):
def __init__(self, mfcc_total):
super(Net, self).__init__()
self.conv_mfcc = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (mfcc_total,10), stride = 5)
self.conv_delta = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (mfcc_total,10), stride = 5)
self.conv_delta_delta = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (mfcc_total,10), stride = 5)
self.conv_1 = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (3,5), stride = 2)
self.conv_2 = nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (3,5), stride = 1)
self.fc1 = nn.Linear(28, 10)
self.fc2 = nn.Linear(10, 1)
self.fc3 = nn.Sigmoid()
def forward(self, mfcc, delta, delta_delta):
# Compressing features into 3x300 matrix
mfcc_features = self.conv_mfcc(mfcc)
delta_features = self.conv_delta(delta)
delta_delta_features = self.conv_delta_delta(delta_delta)
features = torch.cat((mfcc_features, delta_features, delta_delta_features), 2)
features = self.conv_1(features)
features = features.view(-1, 28)
features = self.fc1(features).squeeze(0)
features = self.fc2(features)
features = self.fc3(features)
return features
def load_model(lr, seed, mfcc_total):
torch.manual_seed(seed)
model = Net(mfcc_total)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
return model, loss_function, optimizer
# ### Training
# In[8]:
###### SET HYPERPARAMETERS HERE ######
lr = 0.01
seed = 42
mfcc_total = 13
epochs = 1
# In[62]:
def main_test():
torch.manual_seed(seed)
model, loss_function, optimizer = load_model(lr, seed, mfcc_total)
for epoch in range(epochs):
for i in range(len(X_train)):
# Obtaining our mfcc, delta and delta_delta from X and converting to tensor
mfcc = torch.from_numpy(X[i][0]).unsqueeze(0).unsqueeze(0)
delta = torch.from_numpy(X[i][1]).unsqueeze(0).unsqueeze(0)
delta_delta = torch.from_numpy(X[i][2]).unsqueeze(0).unsqueeze(0)
model(mfcc, delta, delta_delta)
###### FILL THIS OUT ######
main_test()
# In[ ]:
# In[ ]:
# In[ ]: