Skip to content

Commit

Permalink
add ASR
Browse files Browse the repository at this point in the history
  • Loading branch information
varunk122 committed Jul 4, 2020
1 parent 169fba2 commit 1fd9c3b
Show file tree
Hide file tree
Showing 29 changed files with 1,278 additions and 3,220 deletions.
138 changes: 138 additions & 0 deletions ctc_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import numpy as np
import matplotlib.pyplot as plt
import librosa
import os
import soundfile as sf
from scipy.io import wavfile #for audio processing
import random

import tensorflow as tf
print(tf.__version__)
from keras.models import Model , Sequential
from keras.utils import Sequence
import keras

from keras.layers import *
from keras.layers.wrappers import TimeDistributed
from keras.layers.merge import Add
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras import backend as K
from keras.utils import plot_model

def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length )

class CTC():
def __init__(self,
input_size=None,
output_size=None,
initializer='glorot_uniform'):
self.input_size = input_size
self.output_size = output_size
self.initializer = initializer
self.m = None
self.tm = None

def build(self,
conv_filters = 200,
conv2d_filters = 13,
conv_size = 5,
conv2d_strides = 1,
conv_strides = 1,
act = 'relu',
rnn_layers = 2,
LSTM_units = 128,
drop_out = 0.8):

input_data = Input(shape = self.input_size, name = 'the_inputs')
x = Conv1D(conv_filters,
conv_size,
strides = conv_strides,
padding = "same",
name = 'conv1d1')(input_data)
x = BatchNormalization()(x)
x = Activation(act)(x)
x = Conv1D(conv_filters,
conv_size,
strides = conv_strides,
padding = "same",
name = 'conv1d2')(x)
x = BatchNormalization()(x)
x = Activation(act)(x)
for _ in range(rnn_layers):
x = Bidirectional(LSTM(LSTM_units,
return_sequences = True))(x)
x = Dropout(drop_out)(x)
x = BatchNormalization()(x)
y_pred = TimeDistributed(Dense(self.output_size,
activation = 'softmax'))(x)
# ctc inputs
labels = Input(name='the_labels', shape=[None,], dtype='int32')
input_length = Input(name='input_length', shape=[1], dtype='int32')
label_length = Input(name='label_length', shape=[1], dtype='int32')
loss_out = Lambda(ctc_lambda_func,
output_shape=(1,),
name='ctc')([y_pred,
labels,
input_length,
label_length])
self.tm = Model(inputs = input_data,
outputs = y_pred)
self.m = Model(inputs = [input_data,
labels,
input_length,
label_length],
outputs = loss_out)
return self.m, self.tm

def ctc(y_true, y_pred):
return y_pred

model_ctc = CTC((101,594), 29)
model_ctc.build()

json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")


def graph_spectrogram(wav_file):
rate, data = get_wav_info(wav_file)
nfft = 200 # Length of each window segment
fs = 8000 # Sampling frequencies
noverlap = 120 # Overlap between windows
nchannels = data.ndim
if nchannels == 1:
pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
elif nchannels == 2:
pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
return modify_spectogram_shape(pxx)

# Load a wav file
def get_wav_info(swav_file):
rate , data = wavfile.read(wav_file)
return rate, data

def modify_spectrogram_shape( sample ,shape = (101,198) ):
a = np.zeros(shape)
a[: , :sample.shape[1]] = sample
return sample


if __name__ == '__main__':

file_path = input("file_path: ");

inp = graph_spectrogram(file_path)

predictions = loaded_model.preict(inp np.array([0]),np.array([101]),np.array([40]) )




820 changes: 0 additions & 820 deletions demo.ipynb

This file was deleted.

Binary file added dic/embedding.npy
Binary file not shown.
1 change: 1 addition & 0 deletions dic/index_word.json

Large diffs are not rendered by default.

Binary file added dic/index_word.pkl
Binary file not shown.
1 change: 1 addition & 0 deletions dic/word_index.json

Large diffs are not rendered by default.

Binary file added dic/word_index.pkl
Binary file not shown.
Binary file added examples/myvoice.wav
Binary file not shown.
Binary file added examples/voice_1.wav
Binary file not shown.
Binary file added examples/voice_2.wav
Binary file not shown.
Loading

0 comments on commit 1fd9c3b

Please sign in to comment.