Adding all the files

sd2001 · Nov 12, 2020 · 24333e2 · 24333e2
1 parent aafee38
commit 24333e2
Show file tree

Hide file tree

Showing 18 changed files with 204,536 additions and 0 deletions.
diff --git a/files/data_prep.py b/files/data_prep.py
@@ -0,0 +1,46 @@
+import modules as m
+import model as mo
+import data_prep_utils as u
+
+txt=u.make_txt(m.IMG_CAPTION)
+descriptions=u.make_dict(txt)
+u.clean_dict(descriptions)
+vocabulary=u.vocab_create(descriptions)
+u.save_descriptions(descriptions)
+
+test=u.make_set(m.TEST_IMG_NAME)
+test_descriptions=u.load_clean_dict(test)
+test_features=u.load_photos('features.pkl',test)
+
+train=u.make_set(m.TRAIN_IMG_NAME)
+#print(len(train))
+train_descriptions=u.load_clean_dict(train)
+#print(len(train_descriptions))
+train_features=u.load_photos('features.pkl',train)
+#print(len(train_features))
+
+tokenizer=u.create_tokenizer(train_descriptions)
+vocab_size=len(tokenizer.word_index) + 1
+#print('Vocabulary Size: %d' % vocab_size)
+maxlen=u.max_length(train_descriptions)
+
+train=u.make_set(m.TRAIN_IMG_NAME)
+#print('Dataset: %d' % len(train))
+train_descriptions=u.load_clean_dict(train)
+#print('Descriptions: train=%d' % len(train_descriptions))
+train_features=u.load_photos('features.pkl',train)
+#print('Photos: train=%d' % len(train_features))
+vocab_size=len(tokenizer.word_index) + 1
+
+model=mo.define_model(vocab_size, maxlen)
+### Since I already trained this, I am loading a model instead of training once again
+model=m.load_model('model_9.h5')
+epochs=10
+steps=len(train_descriptions)
+### Do uncomment if the model is to be trained again
+#for i in range(epochs):
+    #generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)
+
+    #model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
+
+    #model.save('model_' + str(i) + '.h5')
diff --git a/files/data_prep_utils.py b/files/data_prep_utils.py
@@ -0,0 +1,122 @@
+import modules as m
+import data_visualize as v
+
+def make_txt(file):
+    f=open(file,'r')
+    text=f.read()
+    f.close()
+    return text
+
+def form_dict(img_list):
+    img_dict={}
+    for im in img_list:
+        if im in v.ic:
+            img_dict[im]=v.ic[im]
+    return img_dict
+
+def make_dict(txt):
+    x={}
+    for line in txt.split('\n'):
+        tokens=line.split()
+        if len(line)<3:
+            continue
+        image_id,image_desc=tokens[0],tokens[1:]
+        image_id=image_id.split('.')[0]
+        image_desc = ' '.join(image_desc)
+        if image_id not in x:
+            x[image_id]=list()
+        x[image_id].append(image_desc)
+    return x
+
+import string
+def clean_dict(des):
+    table=str.maketrans('', '', string.punctuation)
+    for k,desc_list in des.items():
+        for i in range(len(desc_list)):
+            desc=desc_list[i]
+            desc=desc.split()
+            desc=[word.lower() for word in desc]
+            desc=[w.translate(table) for w in desc]
+            desc=[word for word in desc if len(word)>1]
+            desc=[word for word in desc if word.isalpha()]
+            desc_list[i]= ' '.join(desc)
+
+def vocab_create(des):
+    all1=set()
+    for k in des.keys():
+        [all1.update(d.split()) for d in des[k]]
+    return all1
+
+def save_descriptions(descriptions):
+    lines=[]
+    for key,d in descriptions.items():
+        for desc in d:
+            lines.append(key + ' ' + desc)
+    data='\n'.join(lines)
+    file=open('descriptions.txt', 'w')
+    file.write(data)
+    file.close()
+
+def make_set(file):
+    txt=make_txt(file)
+    d=[]
+    for line in txt.split('\n'):
+        if len(line)<1:
+            continue
+        sent=line.split('.')[0]
+        d.append(sent)
+    return set(d)
+
+def load_clean_dict(dataset):
+    doc=make_txt('descriptions.txt')
+    descriptions=dict()
+    for line in doc.split('\n'):
+        tokens=line.split()
+        image_id,image_desc=tokens[0], tokens[1:]
+        if image_id in dataset:
+            if image_id not in descriptions:
+                descriptions[image_id] = list()
+            desc='startseq ' + ' '.join(image_desc) + ' endseq'
+            descriptions[image_id].append(desc)
+    return descriptions
+
+def load_photos(file,dataset):
+    all_features=m.load(open(file, 'rb'))
+    f={k: all_features[k] for k in dataset}
+    return f
+
+def max_length(des):
+    lines=to_lines(des)
+    return max(len(d.split()) for d in lines)
+
+def to_lines(des):
+    all=[]
+    for key in des.keys():
+        [all.append(d) for d in des[key]]
+    return all
+
+def create_tokenizer(des):
+    lines=to_lines(des)
+    tokenizer=m.Tokenizer()
+    tokenizer.fit_on_texts(lines)
+    return tokenizer
+
+def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size):
+    X1,X2,y=[],[],[]
+    for desc in desc_list:
+        seq=tokenizer.texts_to_sequences([desc])[0]
+        for i in range(1,len(seq)):
+            in_seq,out_seq=seq[:i], seq[i]
+            in_seq=m.pad_sequences([in_seq], maxlen=max_length)[0]
+            out_seq=m.to_categorical([out_seq], num_classes=vocab_size)[0]
+            X1.append(photo)
+            X2.append(in_seq)
+            y.append(out_seq)
+    return m.array(X1), m.array(X2), m.array(y)
+
+def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
+    while 1:
+        for key,desc_list in descriptions.items():
+            photo=photos[key][0]
+            in_img,in_seq, out_word=create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
+            yield ([in_img,in_seq],out_word)
diff --git a/files/data_visualize.py b/files/data_visualize.py
@@ -0,0 +1,65 @@
+import modules as m
+
+captions=open(m.IMG_CAPTION, 'r').read().split("\n")
+x_train=open(m.TRAIN_IMG_NAME, 'r').read().split("\n")
+x_val=open(m.VALID_IMG_NAME, 'r').read().split("\n")
+x_test=open(m.TEST_IMG_NAME, 'r').read().split("\n")
+
+img=[]
+corpus=[]
+ic={}
+combined=[]
+for c in range(len(captions)-1):
+    a=captions[c].split('#')
+    image=a[0]
+    cp='Start '+a[1][2:]+' End'
+    combined.append([image,cp])
+    img.append(image)
+    corpus.append(cp)
+    if image in ic:
+        ic[image].append(a[1][2:])
+    else:
+        ic[image] = [a[1][2:]]
+
+combined_df=m.DataFrame(combined,columns=['Image','Caption'])
+ds=combined_df.values
+m.nltk.download('punkt')
+
+final_corpus=[]
+dup_corpus=[]
+for sent in corpus:
+    words=m.word_tokenize(sent)
+
+    for w in words:
+        w=w.lower()
+        if w=='.' or w=='!' or w==",":
+            continue
+        else:
+            dup_corpus.append(w)
+            if w in final_corpus:
+                continue
+            else:
+                final_corpus.append(w)
+
+fdist1=m.nltk.FreqDist(dup_corpus)
+fd=fdist1.most_common()
+words=[]
+aa=[]
+for i in range(len(fd)):
+    aa=[]
+    aa.append(fd[i][0])
+    aa.append(fd[i][1])
+    words.append(aa)
+
+df=m.DataFrame(words,columns=['Words','Count'])
+import plotly.express as px
+fig = px.bar(df[:50], x='Words', y='Count',color="Count",title="Most freq occuring words")
+fig.update_layout(
+    font_family="Courier New",
+    title_x=0.5,
+    font_color="green",
+    title_font_family="Times New Roman",
+    title_font_color="black",
+    legend_title_font_color="green"
+)
+fig.show()
diff --git a/files/model.py b/files/model.py
@@ -0,0 +1,18 @@
+import modules as m
+
+def define_model(vocab_size, max_length):
+    inputs1=m.Input(shape=(4096,))
+    fe1=m.Dropout(0.5)(inputs1)
+    fe2=m.Dense(256, activation='relu')(fe1)
+    inputs2=m.Input(shape=(max_length,))
+    se1=m.Embedding(vocab_size, 256, mask_zero=True)(inputs2)
+    se2=m.Dropout(0.5)(se1)
+    se3=m.LSTM(256)(se2)
+    decoder1=m.add([fe2, se3])
+    decoder2=m.Dense(256, activation='relu')(decoder1)
+    outputs=m.Dense(vocab_size, activation='softmax')(decoder2)
+    model=m.Model(inputs=[inputs1, inputs2], outputs=outputs)
+    model.compile(loss='categorical_crossentropy', optimizer='adam')
+    model.summary()
+    m.plot_model(model, to_file='model.png', show_shapes=True)
+    return model
diff --git a/files/modules.py b/files/modules.py
@@ -0,0 +1,40 @@
+import os
+from pandas import DataFrame
+from tensorflow.keras.preprocessing import image, sequence
+import numpy as np
+import nltk
+from nltk.tokenize import word_tokenize
+import ssl
+import plotly
+import logging, os
+logging.disable(logging.WARNING)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+import tensorflow as tf
+from PIL import Image
+from pickle import dump,load
+from tensorflow.keras.applications.vgg16 import VGG16
+from tensorflow.keras.preprocessing.image import load_img
+from tensorflow.keras.preprocessing.image import img_to_array
+from tensorflow.keras.applications.vgg16 import preprocess_input
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.utils import plot_model
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input,Dense,LSTM,Embedding,Dropout,add
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.utils import plot_model
+from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras.models import Model,load_model
+from nltk.translate.bleu_score import corpus_bleu
+tf.get_logger().setLevel('INFO')
+from numpy import argmax,array
+import ssl
+from os import listdir
+import matplotlib.pyplot as plt
+
+IMG_DIR="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_Dataset/Flicker8k_Dataset"
+TRAIN_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.trainImages.txt"
+TEST_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.testImages.txt"
+VALID_IMG_NAME="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr_8k.devImages.txt"
+IMG_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.token.txt"
+LEMM_CAPTION="/home/sd2001/Desktop/Programming/Image Captioning/Flickr8k_text/Flickr8k.lemma.token.txt"
diff --git a/files/output.py b/files/output.py
@@ -0,0 +1,54 @@
+import modules as m
+
+def extract_features(file):
+    model=m.VGG16()
+    model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output)
+    image=m.load_img(file, target_size=(224, 224))
+    image=m.img_to_array(image)
+    image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
+    image=m.preprocess_input(image)
+    feature = model.predict(image, verbose=0)
+    return feature
+
+def extract_features2(img):
+    model=m.VGG16()
+    model=m.Model(inputs=model.inputs, outputs=model.layers[-2].output)
+    image=img.resize((224, 224))
+    image=m.img_to_array(image)
+    image=image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
+    image=m.preprocess_input(image)
+    feature = model.predict(image, verbose=0)
+    return feature
+
+def word2id(integer,tokenizer):
+    for word,i in tokenizer.word_index.items():
+        if i==integer:
+            return word
+    return None
+
+def generate_desc(model, tokenizer, photo, max_length):
+    in_text = 'startseq'
+    for i in range(max_length):
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        sequence = m.pad_sequences([sequence], maxlen=max_length)
+        ypred=model.predict([photo,sequence], verbose=0)
+        ypred=m.argmax(ypred)
+        word=word2id(ypred, tokenizer)
+        if word is None:
+            break
+        in_text+=' '+word
+        if word == 'endseq':
+            break
+    return in_text
+
+tokenizer=m.load(open('tokenizer.pkl', 'rb'))
+max_length=34
+model=m.load_model('model_9.h5')
+pic='test14.jpg'
+photo=extract_features(pic)
+description=generate_desc(model, tokenizer, photo, max_length)
+
+im = m.array(m.Image.open(pic))
+m.plt.imshow(im)
+print(description)
+del generate_desc,extract_features