Skip to content

Commit

Permalink
Adding all the files
Browse files Browse the repository at this point in the history
  • Loading branch information
sd2001 committed Nov 12, 2020
1 parent aafee38 commit 24333e2
Show file tree
Hide file tree
Showing 18 changed files with 204,536 additions and 0 deletions.
46 changes: 46 additions & 0 deletions files/data_prep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import modules as m
import model as mo
import data_prep_utils as u

txt=u.make_txt(m.IMG_CAPTION)
descriptions=u.make_dict(txt)
u.clean_dict(descriptions)
vocabulary=u.vocab_create(descriptions)
u.save_descriptions(descriptions)

test=u.make_set(m.TEST_IMG_NAME)
test_descriptions=u.load_clean_dict(test)
test_features=u.load_photos('features.pkl',test)

train=u.make_set(m.TRAIN_IMG_NAME)
#print(len(train))
train_descriptions=u.load_clean_dict(train)
#print(len(train_descriptions))
train_features=u.load_photos('features.pkl',train)
#print(len(train_features))

tokenizer=u.create_tokenizer(train_descriptions)
vocab_size=len(tokenizer.word_index) + 1
#print('Vocabulary Size: %d' % vocab_size)
maxlen=u.max_length(train_descriptions)

train=u.make_set(m.TRAIN_IMG_NAME)
#print('Dataset: %d' % len(train))
train_descriptions=u.load_clean_dict(train)
#print('Descriptions: train=%d' % len(train_descriptions))
train_features=u.load_photos('features.pkl',train)
#print('Photos: train=%d' % len(train_features))
vocab_size=len(tokenizer.word_index) + 1

model=mo.define_model(vocab_size, maxlen)
### Since I already trained this, I am loading a model instead of training once again
model=m.load_model('model_9.h5')
epochs=10
steps=len(train_descriptions)
### Do uncomment if the model is to be trained again
#for i in range(epochs):
#generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)

#model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)

#model.save('model_' + str(i) + '.h5')
122 changes: 122 additions & 0 deletions files/data_prep_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import modules as m
import data_visualize as v

def make_txt(file):
f=open(file,'r')
text=f.read()
f.close()
return text

def form_dict(img_list):
img_dict={}
for im in img_list:
if im in v.ic:
img_dict[im]=v.ic[im]
return img_dict

def make_dict(txt):
x={}
for line in txt.split('\n'):
tokens=line.split()
if len(line)<3:
continue
image_id,image_desc=tokens[0],tokens[1:]
image_id=image_id.split('.')[0]
image_desc = ' '.join(image_desc)
if image_id not in x:
x[image_id]=list()
x[image_id].append(image_desc)
return x

import string
def clean_dict(des):
table=str.maketrans('', '', string.punctuation)
for k,desc_list in des.items():
for i in range(len(desc_list)):
desc=desc_list[i]
desc=desc.split()
desc=[word.lower() for word in desc]
desc=[w.translate(table) for w in desc]
desc=[word for word in desc if len(word)>1]
desc=[word for word in desc if word.isalpha()]
desc_list[i]= ' '.join(desc)

def vocab_create(des):
all1=set()
for k in des.keys():
[all1.update(d.split()) for d in des[k]]
return all1

def save_descriptions(descriptions):
lines=[]
for key,d in descriptions.items():
for desc in d:
lines.append(key + ' ' + desc)
data='\n'.join(lines)
file=open('descriptions.txt', 'w')
file.write(data)
file.close()

def make_set(file):
txt=make_txt(file)
d=[]
for line in txt.split('\n'):
if len(line)<1:
continue
sent=line.split('.')[0]
d.append(sent)
return set(d)

def load_clean_dict(dataset):
doc=make_txt('descriptions.txt')
descriptions=dict()
for line in doc.split('\n'):
tokens=line.split()
image_id,image_desc=tokens[0], tokens[1:]
if image_id in dataset:
if image_id not in descriptions:
descriptions[image_id] = list()
desc='startseq ' + ' '.join(image_desc) + ' endseq'
descriptions[image_id].append(desc)
return descriptions

def load_photos(file,dataset):
all_features=m.load(open(file, 'rb'))
f={k: all_features[k] for k in dataset}
return f

def max_length(des):
lines=to_lines(des)
return max(len(d.split()) for d in lines)

def to_lines(des):
all=[]
for key in des.keys():
[all.append(d) for d in des[key]]
return all

def create_tokenizer(des):
lines=to_lines(des)
tokenizer=m.Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer

def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size):
X1,X2,y=[],[],[]
for desc in desc_list:
seq=tokenizer.texts_to_sequences([desc])[0]
for i in range(1,len(seq)):
in_seq,out_seq=seq[:i], seq[i]
in_seq=m.pad_sequences([in_seq], maxlen=max_length)[0]
out_seq=m.to_categorical([out_seq], num_classes=vocab_size)[0]
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
return m.array(X1), m.array(X2), m.array(y)

def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
while 1:
for key,desc_list in descriptions.items():
photo=photos[key][0]
in_img,in_seq, out_word=create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
yield ([in_img,in_seq],out_word)
65 changes: 65 additions & 0 deletions files/data_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import modules as m

captions=open(m.IMG_CAPTION, 'r').read().split("\n")
x_train=open(m.TRAIN_IMG_NAME, 'r').read().split("\n")
x_val=open(m.VALID_IMG_NAME, 'r').read().split("\n")
x_test=open(m.TEST_IMG_NAME, 'r').read().split("\n")

img=[]
corpus=[]
ic={}
combined=[]
for c in range(len(captions)-1):
a=captions[c].split('#')
image=a[0]
cp='Start '+a[1][2:]+' End'
combined.append([image,cp])
img.append(image)
corpus.append(cp)
if image in ic:
ic[image].append(a[1][2:])
else:
ic[image] = [a[1][2:]]

combined_df=m.DataFrame(combined,columns=['Image','Caption'])
ds=combined_df.values
m.nltk.download('punkt')

final_corpus=[]
dup_corpus=[]
for sent in corpus:
words=m.word_tokenize(sent)

for w in words:
w=w.lower()
if w=='.' or w=='!' or w==",":
continue
else:
dup_corpus.append(w)
if w in final_corpus:
continue
else:
final_corpus.append(w)

fdist1=m.nltk.FreqDist(dup_corpus)
fd=fdist1.most_common()
words=[]
aa=[]
for i in range(len(fd)):
aa=[]
aa.append(fd[i][0])
aa.append(fd[i][1])
words.append(aa)

df=m.DataFrame(words,columns=['Words','Count'])
import plotly.express as px
fig = px.bar(df[:50], x='Words', y='Count',color="Count",title="Most freq occuring words")
fig.update_layout(
font_family="Courier New",
title_x=0.5,
font_color="green",
title_font_family="Times New Roman",
title_font_color="black",
legend_title_font_color="green"
)
fig.show()
18 changes: 18 additions & 0 deletions files/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import modules as m

def define_model(vocab_size, max_length):
inputs1=m.Input(shape=(4096,))
fe1=m.Dropout(0.5)(inputs1)
fe2=m.Dense(256, activation='relu')(fe1)
inputs2=m.Input(shape=(max_length,))
se1=m.Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2=m.Dropout(0.5)(se1)
se3=m.LSTM(256)(se2)
decoder1=m.add([fe2, se3])
decoder2=m.Dense(256, activation='relu')(decoder1)
outputs=m.Dense(vocab_size, activation='softmax')(decoder2)
model=m.Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()
m.plot_model(model, to_file='model.png', show_shapes=True)
return model
40 changes: 40 additions & 0 deletions files/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from pandas import DataFrame
from tensorflow.keras.preprocessing import image, sequence
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
import ssl
import plotly
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
from PIL import Image
from pickle import dump,load
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout,add
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model,load_model
from nltk.translate.bleu_score import corpus_bleu
tf.get_logger().setLevel('INFO')
from numpy import argmax,array
import ssl
from os import listdir
import matplotlib.pyplot as plt

IMG_DIR="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_Dataset/Flicker8k_Dataset"
TRAIN_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.trainImages.txt"
TEST_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.testImages.txt"
VALID_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.devImages.txt"
IMG_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.token.txt"
LEMM_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.lemma.token.txt"
54 changes: 54 additions & 0 deletions files/output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import modules as m

def extract_features(file):
model=m.VGG16()
model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output)
image=m.load_img(file, target_size=(224, 224))
image=m.img_to_array(image)
image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
image=m.preprocess_input(image)
feature = model.predict(image, verbose=0)
return feature

def extract_features2(img):
model=m.VGG16()
model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output)
image=img.resize((224, 224))
image=m.img_to_array(image)
image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
image=m.preprocess_input(image)
feature = model.predict(image, verbose=0)
return feature

def word2id(integer,tokenizer):
for word,i in tokenizer.word_index.items():
if i==integer:
return word
return None

def generate_desc(model, tokenizer, photo, max_length):
in_text = 'startseq'
for i in range(max_length):
sequence = tokenizer.texts_to_sequences([in_text])[0]
sequence = m.pad_sequences([sequence], maxlen=max_length)
ypred=model.predict([photo,sequence], verbose=0)
ypred=m.argmax(ypred)
word=word2id(ypred, tokenizer)
if word is None:
break
in_text+=' '+word
if word == 'endseq':
break
return in_text

tokenizer=m.load(open('tokenizer.pkl', 'rb'))
max_length=34
model=m.load_model('model_9.h5')
pic='test14.jpg'
photo=extract_features(pic)
description=generate_desc(model, tokenizer, photo, max_length)

im = m.array(m.Image.open(pic))
m.plt.imshow(im)
print(description)
del generate_desc,extract_features
Loading

0 comments on commit 24333e2

Please sign in to comment.