-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
204,536 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import modules as m | ||
import model as mo | ||
import data_prep_utils as u | ||
|
||
txt=u.make_txt(m.IMG_CAPTION) | ||
descriptions=u.make_dict(txt) | ||
u.clean_dict(descriptions) | ||
vocabulary=u.vocab_create(descriptions) | ||
u.save_descriptions(descriptions) | ||
|
||
test=u.make_set(m.TEST_IMG_NAME) | ||
test_descriptions=u.load_clean_dict(test) | ||
test_features=u.load_photos('features.pkl',test) | ||
|
||
train=u.make_set(m.TRAIN_IMG_NAME) | ||
#print(len(train)) | ||
train_descriptions=u.load_clean_dict(train) | ||
#print(len(train_descriptions)) | ||
train_features=u.load_photos('features.pkl',train) | ||
#print(len(train_features)) | ||
|
||
tokenizer=u.create_tokenizer(train_descriptions) | ||
vocab_size=len(tokenizer.word_index) + 1 | ||
#print('Vocabulary Size: %d' % vocab_size) | ||
maxlen=u.max_length(train_descriptions) | ||
|
||
train=u.make_set(m.TRAIN_IMG_NAME) | ||
#print('Dataset: %d' % len(train)) | ||
train_descriptions=u.load_clean_dict(train) | ||
#print('Descriptions: train=%d' % len(train_descriptions)) | ||
train_features=u.load_photos('features.pkl',train) | ||
#print('Photos: train=%d' % len(train_features)) | ||
vocab_size=len(tokenizer.word_index) + 1 | ||
|
||
model=mo.define_model(vocab_size, maxlen) | ||
### Since I already trained this, I am loading a model instead of training once again | ||
model=m.load_model('model_9.h5') | ||
epochs=10 | ||
steps=len(train_descriptions) | ||
### Do uncomment if the model is to be trained again | ||
#for i in range(epochs): | ||
#generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size) | ||
|
||
#model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) | ||
|
||
#model.save('model_' + str(i) + '.h5') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import modules as m | ||
import data_visualize as v | ||
|
||
def make_txt(file): | ||
f=open(file,'r') | ||
text=f.read() | ||
f.close() | ||
return text | ||
|
||
def form_dict(img_list): | ||
img_dict={} | ||
for im in img_list: | ||
if im in v.ic: | ||
img_dict[im]=v.ic[im] | ||
return img_dict | ||
|
||
def make_dict(txt): | ||
x={} | ||
for line in txt.split('\n'): | ||
tokens=line.split() | ||
if len(line)<3: | ||
continue | ||
image_id,image_desc=tokens[0],tokens[1:] | ||
image_id=image_id.split('.')[0] | ||
image_desc = ' '.join(image_desc) | ||
if image_id not in x: | ||
x[image_id]=list() | ||
x[image_id].append(image_desc) | ||
return x | ||
|
||
import string | ||
def clean_dict(des): | ||
table=str.maketrans('', '', string.punctuation) | ||
for k,desc_list in des.items(): | ||
for i in range(len(desc_list)): | ||
desc=desc_list[i] | ||
desc=desc.split() | ||
desc=[word.lower() for word in desc] | ||
desc=[w.translate(table) for w in desc] | ||
desc=[word for word in desc if len(word)>1] | ||
desc=[word for word in desc if word.isalpha()] | ||
desc_list[i]= ' '.join(desc) | ||
|
||
def vocab_create(des): | ||
all1=set() | ||
for k in des.keys(): | ||
[all1.update(d.split()) for d in des[k]] | ||
return all1 | ||
|
||
def save_descriptions(descriptions): | ||
lines=[] | ||
for key,d in descriptions.items(): | ||
for desc in d: | ||
lines.append(key + ' ' + desc) | ||
data='\n'.join(lines) | ||
file=open('descriptions.txt', 'w') | ||
file.write(data) | ||
file.close() | ||
|
||
def make_set(file): | ||
txt=make_txt(file) | ||
d=[] | ||
for line in txt.split('\n'): | ||
if len(line)<1: | ||
continue | ||
sent=line.split('.')[0] | ||
d.append(sent) | ||
return set(d) | ||
|
||
def load_clean_dict(dataset): | ||
doc=make_txt('descriptions.txt') | ||
descriptions=dict() | ||
for line in doc.split('\n'): | ||
tokens=line.split() | ||
image_id,image_desc=tokens[0], tokens[1:] | ||
if image_id in dataset: | ||
if image_id not in descriptions: | ||
descriptions[image_id] = list() | ||
desc='startseq ' + ' '.join(image_desc) + ' endseq' | ||
descriptions[image_id].append(desc) | ||
return descriptions | ||
|
||
def load_photos(file,dataset): | ||
all_features=m.load(open(file, 'rb')) | ||
f={k: all_features[k] for k in dataset} | ||
return f | ||
|
||
def max_length(des): | ||
lines=to_lines(des) | ||
return max(len(d.split()) for d in lines) | ||
|
||
def to_lines(des): | ||
all=[] | ||
for key in des.keys(): | ||
[all.append(d) for d in des[key]] | ||
return all | ||
|
||
def create_tokenizer(des): | ||
lines=to_lines(des) | ||
tokenizer=m.Tokenizer() | ||
tokenizer.fit_on_texts(lines) | ||
return tokenizer | ||
|
||
def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size): | ||
X1,X2,y=[],[],[] | ||
for desc in desc_list: | ||
seq=tokenizer.texts_to_sequences([desc])[0] | ||
for i in range(1,len(seq)): | ||
in_seq,out_seq=seq[:i], seq[i] | ||
in_seq=m.pad_sequences([in_seq], maxlen=max_length)[0] | ||
out_seq=m.to_categorical([out_seq], num_classes=vocab_size)[0] | ||
X1.append(photo) | ||
X2.append(in_seq) | ||
y.append(out_seq) | ||
return m.array(X1), m.array(X2), m.array(y) | ||
|
||
def data_generator(descriptions, photos, tokenizer, max_length, vocab_size): | ||
while 1: | ||
for key,desc_list in descriptions.items(): | ||
photo=photos[key][0] | ||
in_img,in_seq, out_word=create_sequences(tokenizer, max_length, desc_list, photo, vocab_size) | ||
yield ([in_img,in_seq],out_word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import modules as m | ||
|
||
captions=open(m.IMG_CAPTION, 'r').read().split("\n") | ||
x_train=open(m.TRAIN_IMG_NAME, 'r').read().split("\n") | ||
x_val=open(m.VALID_IMG_NAME, 'r').read().split("\n") | ||
x_test=open(m.TEST_IMG_NAME, 'r').read().split("\n") | ||
|
||
img=[] | ||
corpus=[] | ||
ic={} | ||
combined=[] | ||
for c in range(len(captions)-1): | ||
a=captions[c].split('#') | ||
image=a[0] | ||
cp='Start '+a[1][2:]+' End' | ||
combined.append([image,cp]) | ||
img.append(image) | ||
corpus.append(cp) | ||
if image in ic: | ||
ic[image].append(a[1][2:]) | ||
else: | ||
ic[image] = [a[1][2:]] | ||
|
||
combined_df=m.DataFrame(combined,columns=['Image','Caption']) | ||
ds=combined_df.values | ||
m.nltk.download('punkt') | ||
|
||
final_corpus=[] | ||
dup_corpus=[] | ||
for sent in corpus: | ||
words=m.word_tokenize(sent) | ||
|
||
for w in words: | ||
w=w.lower() | ||
if w=='.' or w=='!' or w==",": | ||
continue | ||
else: | ||
dup_corpus.append(w) | ||
if w in final_corpus: | ||
continue | ||
else: | ||
final_corpus.append(w) | ||
|
||
fdist1=m.nltk.FreqDist(dup_corpus) | ||
fd=fdist1.most_common() | ||
words=[] | ||
aa=[] | ||
for i in range(len(fd)): | ||
aa=[] | ||
aa.append(fd[i][0]) | ||
aa.append(fd[i][1]) | ||
words.append(aa) | ||
|
||
df=m.DataFrame(words,columns=['Words','Count']) | ||
import plotly.express as px | ||
fig = px.bar(df[:50], x='Words', y='Count',color="Count",title="Most freq occuring words") | ||
fig.update_layout( | ||
font_family="Courier New", | ||
title_x=0.5, | ||
font_color="green", | ||
title_font_family="Times New Roman", | ||
title_font_color="black", | ||
legend_title_font_color="green" | ||
) | ||
fig.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import modules as m | ||
|
||
def define_model(vocab_size, max_length): | ||
inputs1=m.Input(shape=(4096,)) | ||
fe1=m.Dropout(0.5)(inputs1) | ||
fe2=m.Dense(256, activation='relu')(fe1) | ||
inputs2=m.Input(shape=(max_length,)) | ||
se1=m.Embedding(vocab_size, 256, mask_zero=True)(inputs2) | ||
se2=m.Dropout(0.5)(se1) | ||
se3=m.LSTM(256)(se2) | ||
decoder1=m.add([fe2, se3]) | ||
decoder2=m.Dense(256, activation='relu')(decoder1) | ||
outputs=m.Dense(vocab_size, activation='softmax')(decoder2) | ||
model=m.Model(inputs=[inputs1, inputs2], outputs=outputs) | ||
model.compile(loss='categorical_crossentropy', optimizer='adam') | ||
model.summary() | ||
m.plot_model(model, to_file='model.png', show_shapes=True) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import os | ||
from pandas import DataFrame | ||
from tensorflow.keras.preprocessing import image, sequence | ||
import numpy as np | ||
import nltk | ||
from nltk.tokenize import word_tokenize | ||
import ssl | ||
import plotly | ||
import logging, os | ||
logging.disable(logging.WARNING) | ||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | ||
import tensorflow as tf | ||
from PIL import Image | ||
from pickle import dump,load | ||
from tensorflow.keras.applications.vgg16 import VGG16 | ||
from tensorflow.keras.preprocessing.image import load_img | ||
from tensorflow.keras.preprocessing.image import img_to_array | ||
from tensorflow.keras.applications.vgg16 import preprocess_input | ||
from tensorflow.keras.preprocessing.text import Tokenizer | ||
from tensorflow.keras.utils import plot_model | ||
from tensorflow.keras.models import Model | ||
from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout,add | ||
from tensorflow.keras.preprocessing.sequence import pad_sequences | ||
from tensorflow.keras.utils import to_categorical | ||
from tensorflow.keras.utils import plot_model | ||
from tensorflow.keras.callbacks import ModelCheckpoint | ||
from tensorflow.keras.models import Model,load_model | ||
from nltk.translate.bleu_score import corpus_bleu | ||
tf.get_logger().setLevel('INFO') | ||
from numpy import argmax,array | ||
import ssl | ||
from os import listdir | ||
import matplotlib.pyplot as plt | ||
|
||
IMG_DIR="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_Dataset/Flicker8k_Dataset" | ||
TRAIN_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.trainImages.txt" | ||
TEST_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.testImages.txt" | ||
VALID_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.devImages.txt" | ||
IMG_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.token.txt" | ||
LEMM_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.lemma.token.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import modules as m | ||
|
||
def extract_features(file): | ||
model=m.VGG16() | ||
model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output) | ||
image=m.load_img(file, target_size=(224, 224)) | ||
image=m.img_to_array(image) | ||
image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | ||
image=m.preprocess_input(image) | ||
feature = model.predict(image, verbose=0) | ||
return feature | ||
|
||
def extract_features2(img): | ||
model=m.VGG16() | ||
model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output) | ||
image=img.resize((224, 224)) | ||
image=m.img_to_array(image) | ||
image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | ||
image=m.preprocess_input(image) | ||
feature = model.predict(image, verbose=0) | ||
return feature | ||
|
||
def word2id(integer,tokenizer): | ||
for word,i in tokenizer.word_index.items(): | ||
if i==integer: | ||
return word | ||
return None | ||
|
||
def generate_desc(model, tokenizer, photo, max_length): | ||
in_text = 'startseq' | ||
for i in range(max_length): | ||
sequence = tokenizer.texts_to_sequences([in_text])[0] | ||
sequence = m.pad_sequences([sequence], maxlen=max_length) | ||
ypred=model.predict([photo,sequence], verbose=0) | ||
ypred=m.argmax(ypred) | ||
word=word2id(ypred, tokenizer) | ||
if word is None: | ||
break | ||
in_text+=' '+word | ||
if word == 'endseq': | ||
break | ||
return in_text | ||
|
||
tokenizer=m.load(open('tokenizer.pkl', 'rb')) | ||
max_length=34 | ||
model=m.load_model('model_9.h5') | ||
pic='test14.jpg' | ||
photo=extract_features(pic) | ||
description=generate_desc(model, tokenizer, photo, max_length) | ||
|
||
im = m.array(m.Image.open(pic)) | ||
m.plt.imshow(im) | ||
print(description) | ||
del generate_desc,extract_features |
Oops, something went wrong.