add ASR

isro01 · Jul 4, 2020 · 1fd9c3b · 1fd9c3b
1 parent 169fba2
commit 1fd9c3b
Show file tree

Hide file tree

Showing 29 changed files with 1,278 additions and 3,220 deletions.
diff --git a/ctc_model.py b/ctc_model.py
@@ -0,0 +1,138 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import librosa
+import os
+import soundfile as sf
+from scipy.io import wavfile #for audio processing
+import random
+
+import tensorflow as tf
+print(tf.__version__)
+from keras.models import Model , Sequential
+from keras.utils import Sequence
+import keras
+
+from keras.layers import *
+from keras.layers.wrappers import TimeDistributed
+from keras.layers.merge import Add
+from keras.layers.normalization import BatchNormalization
+from keras.models import Model
+from keras import backend as K
+from keras.utils import plot_model
+
+def ctc_lambda_func(args):
+    y_pred, labels, input_length, label_length = args    
+    return K.ctc_batch_cost(labels, y_pred, input_length, label_length )    
+
+class CTC():  
+    def __init__(self,
+                 input_size=None, 
+                 output_size=None,
+                 initializer='glorot_uniform'):
+        self.input_size = input_size
+        self.output_size = output_size
+        self.initializer = initializer
+        self.m = None
+        self.tm = None
+
+    def build(self, 
+              conv_filters = 200,
+              conv2d_filters = 13,
+              conv_size = 5,
+              conv2d_strides = 1,
+              conv_strides = 1,
+              act = 'relu',
+              rnn_layers = 2,
+              LSTM_units = 128,
+              drop_out = 0.8):
+
+        input_data = Input(shape = self.input_size, name = 'the_inputs')
+        x = Conv1D(conv_filters, 
+                   conv_size, 
+                   strides = conv_strides,
+                   padding = "same", 
+                   name = 'conv1d1')(input_data)
+        x = BatchNormalization()(x)
+        x = Activation(act)(x)
+        x = Conv1D(conv_filters, 
+                   conv_size, 
+                   strides = conv_strides,
+                   padding = "same", 
+                   name = 'conv1d2')(x)
+        x = BatchNormalization()(x)
+        x = Activation(act)(x)
+        for _ in range(rnn_layers):          
+            x = Bidirectional(LSTM(LSTM_units, 
+                                   return_sequences = True))(x)
+            x = Dropout(drop_out)(x)
+            x = BatchNormalization()(x)
+        y_pred = TimeDistributed(Dense(self.output_size, 
+                                       activation = 'softmax'))(x)        
+        # ctc inputs
+        labels = Input(name='the_labels', shape=[None,], dtype='int32')
+        input_length = Input(name='input_length', shape=[1], dtype='int32')
+        label_length = Input(name='label_length', shape=[1], dtype='int32')    
+        loss_out = Lambda(ctc_lambda_func, 
+                          output_shape=(1,), 
+                          name='ctc')([y_pred,
+                                        labels,
+                                        input_length,
+                                        label_length])        
+        self.tm = Model(inputs = input_data,
+                        outputs = y_pred)
+        self.m = Model(inputs = [input_data, 
+                                 labels, 
+                                 input_length, 
+                                 label_length], 
+                        outputs = loss_out)
+        return self.m, self.tm
+
+def ctc(y_true, y_pred):
+    return y_pred
+
+model_ctc = CTC((101,594), 29)
+model_ctc.build()
+
+json_file = open('model.json', 'r')
+loaded_model_json = json_file.read()
+json_file.close()
+loaded_model = model_from_json(loaded_model_json)
+# load weights into new model
+loaded_model.load_weights("model.h5")
+print("Loaded model from disk")
+
+
+def graph_spectrogram(wav_file):
+    rate, data = get_wav_info(wav_file)
+    nfft = 200 # Length of each window segment
+    fs = 8000 # Sampling frequencies
+    noverlap = 120 # Overlap between windows
+    nchannels = data.ndim
+    if nchannels == 1:
+        pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
+    elif nchannels == 2:
+        pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
+    return modify_spectogram_shape(pxx)
+
+# Load a wav file
+def get_wav_info(swav_file):
+    rate , data = wavfile.read(wav_file)
+    return rate, data
+
+def modify_spectrogram_shape( sample ,shape = (101,198) ):
+    a = np.zeros(shape)
+    a[: , :sample.shape[1]] = sample
+    return sample
+
+
+if __name__ == '__main__':
+
+    file_path = input("file_path: ");
+
+    inp = graph_spectrogram(file_path)
+
+    predictions = loaded_model.preict(inp np.array([0]),np.array([101]),np.array([40]) )
+
+
+
+
diff --git a/demo.ipynb b/demo.ipynb
diff --git a/dic/embedding.npy b/dic/embedding.npy
diff --git a/dic/index_word.json b/dic/index_word.json
diff --git a/dic/index_word.pkl b/dic/index_word.pkl
diff --git a/dic/word_index.json b/dic/word_index.json
diff --git a/dic/word_index.pkl b/dic/word_index.pkl
diff --git a/examples/myvoice.wav b/examples/myvoice.wav
diff --git a/examples/voice_1.wav b/examples/voice_1.wav
diff --git a/examples/voice_2.wav b/examples/voice_2.wav