diff --git a/src/confs/config_coco.json b/src/confs/config_coco.json
new file mode 100644
index 0000000..17cef8c
--- /dev/null
+++ b/src/confs/config_coco.json
@@ -0,0 +1,33 @@
+{
+    "model" : {
+        "input_size":       416,
+        "grid_size":        13,
+        "true_box_buffer":  10,
+        "iou_threshold":    0.5,  
+        "nms_threshold":    0.3
+    },
+    "config_path" : {
+        "labels":           "models/coco/labels_coco.txt",
+        "anchors":          "models/coco/anchors_coco.txt",
+        "arch_plotname":    ""
+    },
+    "train": {
+        "out_model_name":   "",
+        "image_folder":     "",
+        "annot_folder":     "",
+        "batch_size":       16,
+        "learning_rate":    1e-4,
+        "num_epochs":       20,
+        "object_scale":     5.0 ,
+        "no_object_scale":  1.0,
+        "coord_scale":      1.0,
+        "class_scale":      1.0,
+        "verbose":          1
+    },
+
+    "valid": {
+        "image_folder":     "",
+        "annot_folder":     "",
+        "pred_folder":      ""
+    }
+}
diff --git a/src/confs/config_voc.json b/src/confs/config_voc.json
new file mode 100644
index 0000000..509facd
--- /dev/null
+++ b/src/confs/config_voc.json
@@ -0,0 +1,34 @@
+{
+    "model" : {
+        "input_size":       416,
+        "grid_size":        13,
+        "true_box_buffer":  10,
+        "iou_threshold":    0.5,  
+        "nms_threshold":    0.45
+    },
+    "config_path" : {
+        "labels":           "models/voc/labels_voc.txt",
+        "anchors":          "models/voc/anchors_voc.txt",
+        "arch_plotname":    "voc_arch.png"
+    },
+    "train": {
+        "out_model_name":   "yolo_retrained_voc.h5",
+        "image_folder":     "/home/kiran/Documents/DATA/VOC/train/imgs",
+        "annot_folder":     "/home/kiran/Documents/DATA/VOC/train/anns",
+        "batch_size":       16,
+        "learning_rate":    1e-4,
+        "num_epochs":       50,
+        "object_scale":     5.0 ,
+        "no_object_scale":  1.0,
+        "coord_scale":      1.0,
+        "class_scale":      1.0,
+        "verbose":          1
+    },
+
+    "valid": {
+        "image_folder":     "/home/kiran/Documents/DATA/VOC/valid/imgs",
+        "annot_folder":     "/home/kiran/Documents/DATA/VOC/valid/anns",
+        "pred_folder":      "/home/kiran/Documents/DATA/VOC/valid/img_pred",
+        "plot_preds":       true
+    }
+}
diff --git a/src/gpu_test.py b/src/gpu_test.py
new file mode 100644
index 0000000..f146cac
--- /dev/null
+++ b/src/gpu_test.py
@@ -0,0 +1,2 @@
+from keras import backend as K
+K.tensorflow_backend._get_available_gpus()
\ No newline at end of file
diff --git a/src/models/coco/anchors_coco.txt b/src/models/coco/anchors_coco.txt
new file mode 100644
index 0000000..808be3a
--- /dev/null
+++ b/src/models/coco/anchors_coco.txt
@@ -0,0 +1 @@
+0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
diff --git a/src/models/coco/labels_coco.txt b/src/models/coco/labels_coco.txt
new file mode 100644
index 0000000..941cb4e
--- /dev/null
+++ b/src/models/coco/labels_coco.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/src/models/voc/anchors_voc.txt b/src/models/voc/anchors_voc.txt
new file mode 100644
index 0000000..5374c6f
--- /dev/null
+++ b/src/models/voc/anchors_voc.txt
@@ -0,0 +1 @@
+1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
diff --git a/src/models/voc/labels_voc.txt b/src/models/voc/labels_voc.txt
new file mode 100644
index 0000000..1168c39
--- /dev/null
+++ b/src/models/voc/labels_voc.txt
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
\ No newline at end of file
diff --git a/src/net/__init__.py b/src/net/__init__.py
new file mode 100644
index 0000000..8a87e95
--- /dev/null
+++ b/src/net/__init__.py
@@ -0,0 +1,7 @@
+#from . import netarch
+#from . import netdecode
+#from . import neteval
+#from . import netgen
+#from . import netloss
+#from . import netparams
+#from . import utils
\ No newline at end of file
diff --git a/src/net/netarch.py b/src/net/netarch.py
new file mode 100644
index 0000000..e2346db
--- /dev/null
+++ b/src/net/netarch.py
@@ -0,0 +1,274 @@
+"""
+Set up keras model with Yolo v2 architecture, for both training
+and inference.
+"""
+import tensorflow as tf
+import numpy as np
+import pickle, argparse, json, os, cv2
+
+from keras.models import Model, load_model
+from keras.layers import Reshape, Conv2D, Input, MaxPooling2D, BatchNormalization, Lambda
+from keras.layers.advanced_activations import LeakyReLU
+
+from keras.layers.merge import concatenate
+from keras.utils.vis_utils import plot_model
+
+from .netparams import YoloParams
+from .netdecode import YoloOutProcess
+
+
+
+class YoloInferenceModel(object):
+
+    def __init__(self, model):
+        self._yolo_out = YoloOutProcess()
+        self._inf_model = self._extend_processing(model)
+        self._model = model
+
+    def _extend_processing(self, model):
+        output = Lambda(self._yolo_out, name='lambda_2')(model.output)
+        return Model(model.input, output)
+
+
+    def _prepro_single_image(self, image):
+        image = cv2.resize(image, 
+            (YoloParams.INPUT_SIZE, YoloParams.INPUT_SIZE))
+        # yolo normalize
+        image = image / 255.
+        image = image[:,:,::-1]
+        # cv2 has the channel as bgr, revert to to rgb for Yolo Pass
+        image = np.expand_dims(image, 0)
+
+        return image
+
+    def predict(self, image):
+
+        image = self._prepro_single_image(image)
+
+        output = self._inf_model.predict(image)[0]
+
+        if output.size == 0:
+            return [np.array([])]*4
+
+
+        label_idxs = output[:,5].astype(int)
+
+        labels = [YoloParams.CLASS_LABELS[l] for l in label_idxs]
+
+        return labels
+
+
+
+
+class YoloArchitecture(object):
+
+    def __init__(self):
+
+        self.in_model_name = YoloParams.IN_MODEL
+    #     # self.plot_name = YoloParams.ARCH_FNAME
+
+    def get_model(self):
+
+        yolo_model = self._load_yolo_model()
+
+        # if YoloParams.YOLO_MODE == 'train':
+        #     new_yolo_model = self._setup_transfer_learning(yolo_model)
+        #     #new_name = self.tl_weights_name.split('.')[0] + '_rand.h5'
+        #     #new_yolo_model.save_weights(new_name)
+        #
+        # elif YoloParams.YOLO_MODE in ['inference','validate','video','cam']:
+        #     new_yolo_model = yolo_model
+        #
+        # else:
+        #     raise ValueError(
+        #     'Please set \'--action\' to \'train\', \'validate\' or pass an image file/dir.')
+            
+        # if self.plot_name:
+        #     plot_model(new_yolo_model, to_file=self.plot_name, show_shapes=True)
+
+        #return new_yolo_model
+        return yolo_model
+
+
+    def _load_yolo_model(self):
+        if os.path.isfile(self.in_model_name):
+            
+            model = load_model(self.in_model_name, compile=False)
+            
+            return model
+        else:
+            raise ValueError('Need to load full model in order to do '
+                'transfer learning. Run script again with desired TL '
+                'config and weight file to generate model.')
+            
+        
+    def weights_to_model(self, in_path, out_path):
+        yolo_model = self._yolo_v2_architecture()
+
+        try:
+            yolo_model.load_weights(in_path)
+        
+        except IOError as e:
+            print('File for pre-trained weights not found.')
+
+        yolo_model.save(out_path)
+        return yolo_model
+
+
+
+    def _yolo_v2_architecture(self):
+        # Parse from cfg!
+        self.layer_counter = 0
+
+        def space_to_depth_x2(x):
+   
+            import tensorflow as tf
+            return tf.space_to_depth(x, block_size=2)
+
+        
+        # def conv2D_bn_leaky(inp, filters, kernel_size=(3,3), strides=(1,1), maxpool=False):
+        #     self.layer_counter += 1
+        #     x = Conv2D(filters, kernel_size=kernel_size, strides=strides,
+        #      padding='same', use_bias=False)(inp)
+        #
+        #     x = BatchNormalization()(x)
+        #     x = LeakyReLU(alpha=0.1)(x)
+        #     if maxpool:
+        #         return MaxPooling2D(pool_size=(2, 2))(x)
+        #     return x
+        #
+        # input_image = Input(shape=(YoloParams.INPUT_SIZE, YoloParams.INPUT_SIZE, 3), name='input')
+        #
+        # # Layer 1
+        # x = conv2D_bn_leaky(input_image, 32, (3,3), (1,1), maxpool=True)
+        #
+        # # Layer 2
+        # x = conv2D_bn_leaky(x, 64, maxpool=True)
+        #
+        # # Layer 3
+        # x = conv2D_bn_leaky(x, 128)
+        #
+        # # Layer 4
+        # x = conv2D_bn_leaky(x, 64, kernel_size=(1,1))
+        #
+        # # Layer 5
+        # x = conv2D_bn_leaky(x, 128, maxpool=True)
+        #
+        # # Layer 6
+        # x = conv2D_bn_leaky(x, 256)
+        #
+        # # Layer 7
+        # x = conv2D_bn_leaky(x, 128, kernel_size=(1,1))
+        #
+        # # Layer 8
+        # x = conv2D_bn_leaky(x, 256, maxpool=True)
+        #
+        # # Layer 9
+        # x = conv2D_bn_leaky(x, 512)
+        #
+        # # Layer 10
+        # x = conv2D_bn_leaky(x, 256, kernel_size=(1,1))
+        #
+        # # Layer 11
+        # x = conv2D_bn_leaky(x, 512)
+        #
+        # # Layer 12
+        # x = conv2D_bn_leaky(x, 256, kernel_size=(1,1))
+        #
+        # # Layer 13
+        # x = conv2D_bn_leaky(x, 512)
+        #
+        # skip_connection = x
+        # x = MaxPooling2D(pool_size=(2, 2))(x)
+        #
+        # # Layer 14
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Layer 15
+        # x = conv2D_bn_leaky(x, 512, kernel_size=(1,1))
+        # # Layer 16
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Layer 17
+        # x = conv2D_bn_leaky(x, 512, kernel_size=(1,1))
+        # # Layer 18
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Layer 19
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Layer 20
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Layer 21
+        # skip_connection = conv2D_bn_leaky(skip_connection, 64, kernel_size=(1,1))
+        # skip_connection = Lambda(space_to_depth_x2)(skip_connection)
+        # x = concatenate([skip_connection, x])
+        #
+        # # Layer 22
+        # x = conv2D_bn_leaky(x, 1024)
+        #
+        # # Final Conv2D
+        # x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1),
+        #     strides=(1,1), padding='same')(x)
+        #
+        #
+        # output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE,
+        #     YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x)
+        #
+        # yolo_model = Model(input_image, output)
+        #
+        # return yolo_model
+
+
+
+    # def _setup_transfer_learning(self, yolo_model):
+    #
+    #     new_yolo_model = self._yolo_v2_update(yolo_model)
+    #
+    #     layer   = new_yolo_model.layers[-2] # the last convolutional layer
+    #     weights = layer.get_weights()
+    #
+    #     S2 = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE
+    #     new_kernel = np.random.normal(size=weights[0].shape)/S2
+    #     new_bias   = np.random.normal(size=weights[1].shape)/S2
+    #
+    #     layer.set_weights([new_kernel, new_bias])
+    #
+    #     return new_yolo_model
+
+
+
+    # def _yolo_v2_update(self, old_yolo_model):
+    #
+    #     x = Conv2D(YoloParams.NUM_BOUNDING_BOXES * (4 + 1 + YoloParams.NUM_CLASSES), (1,1),
+    #         strides=(1,1), padding='same', name='conv_23')(old_yolo_model.layers[-3].output)
+    #
+    #     output = Reshape((YoloParams.GRID_SIZE, YoloParams.GRID_SIZE,
+    #         YoloParams.NUM_BOUNDING_BOXES, 4 + 1 + YoloParams.NUM_CLASSES))(x)
+    #
+    #     yolo_model = Model(old_yolo_model.input, output)
+    #
+    #     return yolo_model
+
+
+# def generate_model():
+#
+#     yolo_arch = YoloArchitecture()
+#
+#     d = os.path.dirname(YoloParams.WEIGHT_FILE)
+#
+#     out_fname = os.path.join(d, 'model.h5')
+#
+#     print('------------------------------------')
+#     print('Reading weights from: %s'%YoloParams.WEIGHT_FILE)
+#     print('Loading into YOLO V2 architecture and storing...')
+#     print('\n\n')
+#     yolo_arch.weights_to_model(YoloParams.WEIGHT_FILE, out_fname)
+#     print('\tModel saved: %s'%out_fname)
+#     print('\n\n------------------------------------')
+#     print('Done.')
+
+
+
+
diff --git a/src/net/netdecode.py b/src/net/netdecode.py
new file mode 100644
index 0000000..eeb2ffd
--- /dev/null
+++ b/src/net/netdecode.py
@@ -0,0 +1,218 @@
+"""
+Process [GRID x GRID x BOXES x (4 + 1 + CLASSES)]. Filter low confidence
+boxes, apply NMS and return boxes, scores, classes.
+"""
+
+import tensorflow as tf
+from keras import backend as K
+import numpy as np
+from .netparams import YoloParams
+
+
+
+
+def process_outs(b, s, c):
+    
+    b_p = b
+    # Expand dims of scores and classes so we can concat them 
+    # with the boxes and have the output of NMS as an added layer of YOLO.
+    # Have to do another expand_dims this time on the first dim of the result
+    # since NMS doesn't know about BATCH_SIZE (operates on 2D, see 
+    # https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression) 
+    # but keras needs this dimension in the output.
+    s_p = K.expand_dims(s, axis=-1)
+    c_p = K.expand_dims(c, axis=-1)
+    
+    output_stack = K.concatenate([b_p, s_p, c_p], axis=1)
+    return K.expand_dims(output_stack, axis=0)
+
+
+class YoloOutProcess(object):
+
+
+    def __init__(self):
+        # thresholds
+        self.max_boxes = YoloParams.TRUE_BOX_BUFFER
+        self.nms_threshold = YoloParams.NMS_THRESHOLD
+        self.detection_threshold = YoloParams.DETECTION_THRESHOLD
+
+        self.num_classes = YoloParams.NUM_CLASSES
+
+    def __call__(self, y_sing_pred):
+
+        # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. 
+        b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0]) / YoloParams.GRID_SIZE
+        b_wh = (K.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0]) / YoloParams.GRID_SIZE
+        b_xy1 = b_xy - b_wh / 2.
+        b_xy2 = b_xy + b_wh / 2.
+        boxes = K.concatenate([b_xy1, b_xy2], axis=-1)
+        
+        # filter out scores below detection threshold
+        scores_all = K.sigmoid(y_sing_pred[..., 4:5]) * K.softmax(y_sing_pred[...,5:])
+        indicator_detection = scores_all > self.detection_threshold
+        scores_all = scores_all * K.cast(indicator_detection, np.float32)
+
+        # compute detected classes and scores
+        classes = K.argmax(scores_all, axis=-1)
+        scores = K.max(scores_all, axis=-1)
+
+        # flattened tensor length
+        S2B = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES
+
+        # flatten boxes, scores for NMS
+        flatten_boxes = K.reshape(boxes, shape=(S2B, 4))
+        flatten_scores = K.reshape(scores, shape=(S2B, ))
+        flatten_classes = K.reshape(classes, shape=(S2B, ))
+
+        inds = []
+
+        # apply multiclass NMS 
+        for c in range(self.num_classes):
+
+            # only include boxes of the current class, with > 0 confidence
+            class_mask = K.cast(K.equal(flatten_classes, c), np.float32)
+            score_mask = K.cast(flatten_scores > 0, np.float32) 
+            mask = class_mask * score_mask
+            
+            # compute class NMS
+            nms_inds = tf.image.non_max_suppression(
+                    flatten_boxes, 
+                    flatten_scores*mask, 
+                    max_output_size=self.max_boxes, 
+                    iou_threshold=self.nms_threshold,
+                    score_threshold=0.
+                )
+            
+            inds.append(nms_inds)
+
+        # combine winning box indices of all classes 
+        selected_indices = K.concatenate(inds, axis=-1)
+        
+        # gather corresponding boxes, scores, class indices
+        selected_boxes = K.gather(flatten_boxes, selected_indices)
+        selected_scores = K.gather(flatten_scores, selected_indices)
+        selected_classes = K.gather(flatten_classes, selected_indices)
+
+        return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32))
+
+
+
+
+class YoloOutProcessOther(object):
+    """
+    [UNUSED] Ignore.
+    """
+
+    def __init__(self):
+
+        self.max_boxes = YoloParams.TRUE_BOX_BUFFER
+        self.nms_threshold = YoloParams.NMS_THRESHOLD
+        self.detection_threshold = YoloParams.DETECTION_THRESHOLD
+
+        self.num_classes = YoloParams.NUM_CLASSES
+
+
+    def _class_nms(self, boxes, scores, c_mask):
+        #c_mask = K.equal(classes, i)
+        c_mask = c_mask*K.cast(scores > 0, np.float32)
+        c_boxes = boxes * K.expand_dims(c_mask, axis=-1)
+        c_scores = scores * c_mask
+        inds = tf.image.non_max_suppression(c_boxes, c_scores, max_output_size=10, iou_threshold=0.2)
+        # tf.pad(inds, tf.Variable([[0,10-tf.shape(inds)[0]]]), "CONSTANT")
+        return self._pad_tensor(inds, 10, value=-1)
+        
+
+    def _pad_tensor(self, t, length, value=0):
+        """Pads the input tensor with 0s along the first dimension up to the length.
+        Args:
+        t: the input tensor, assuming the rank is at least 1.
+        length: a tensor of shape [1]  or an integer, indicating the first dimension
+          of the input tensor t after padding, assuming length <= t.shape[0].
+        Returns:
+        padded_t: the padded tensor, whose first dimension is length. If the length
+          is an integer, the first dimension of padded_t is set to length
+          statically.
+        """
+        t_rank = tf.rank(t)
+        t_shape = tf.shape(t)
+        t_d0 = t_shape[0]
+        pad_d0 = tf.expand_dims(length - t_d0, 0)
+        pad_shape = tf.cond(
+          tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0),
+          lambda: tf.expand_dims(length - t_d0, 0))
+        padded_t = tf.concat([t, value+tf.zeros(pad_shape, dtype=t.dtype)], 0)
+
+        t_shape = padded_t.get_shape().as_list()
+        t_shape[0] = length
+        padded_t.set_shape(t_shape)
+
+        return padded_t
+
+    def __call__(self, y_sing_pred):
+
+        # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. 
+        b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + YoloParams.c_grid[0]) / YoloParams.GRID_SIZE
+        b_wh = (K.exp(y_sing_pred[..., 2:4])*YoloParams.anchors[0]) / YoloParams.GRID_SIZE
+        b_xy1 = b_xy - b_wh / 2.
+        b_xy2 = b_xy + b_wh / 2.
+        boxes = K.concatenate([b_xy1, b_xy2], axis=-1)
+        
+        scores_all = K.expand_dims(K.sigmoid(y_sing_pred[..., 4]), axis=-1) * K.softmax(y_sing_pred[...,5:])
+        indicator_detection = scores_all > self.detection_threshold
+        scores_all = scores_all * K.cast(indicator_detection, np.float32)
+
+        classes = K.argmax(scores_all, axis=-1)
+        scores = K.max(scores_all, axis=-1)
+
+        S2B = YoloParams.GRID_SIZE*YoloParams.GRID_SIZE*YoloParams.NUM_BOUNDING_BOXES
+
+        flatten_boxes = K.reshape(boxes, shape=(S2B, 4))
+        flatten_scores = K.reshape(scores, shape=(S2B, ))
+        flatten_classes = K.reshape(classes, shape=(S2B, ))
+
+
+        c_masks = K.map_fn(lambda c: K.cast(K.equal(flatten_classes, c), np.float32), np.arange(self.num_classes), dtype=np.float32)
+        resu_stacked = tf.map_fn(
+            lambda c: self._class_nms(flatten_boxes, flatten_scores, c), 
+            c_masks, 
+            dtype=np.int32, 
+            infer_shape=True)
+
+        resu_flat = K.reshape(resu_stacked, shape=(-1,))
+        selected_indices = tf.boolean_mask(resu_flat, ~K.equal(resu_flat, -1))
+
+        selected_boxes = K.gather(flatten_boxes, selected_indices)
+        selected_scores = K.gather(flatten_scores, selected_indices)
+        selected_classes = K.gather(flatten_classes, selected_indices)
+
+        # Exclude padding boxes left behind by tensorflow NMS
+        score_mask = selected_scores>0.
+        selected_boxes = tf.boolean_mask(selected_boxes, score_mask)  
+        selected_scores = tf.boolean_mask(selected_scores, score_mask)  
+        selected_classes = tf.boolean_mask(selected_classes, score_mask)  
+        
+        return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32))
+
+
+
+
+
+if __name__ == '__main__':
+
+    tf.InteractiveSession()
+
+    a = tf.convert_to_tensor(np.load('ocell.npy'), np.float32)
+    
+    yolo_out = YoloOutProcess()
+
+    resu = yolo_out(a).eval()[0]
+
+    b = resu[:,:4]
+    s = resu[:,4]
+    c = resu[:,5]
+
+    print('---------------------')
+
+    print(c)
+    print(s)
+    print(b)
diff --git a/src/net/netparams.py b/src/net/netparams.py
new file mode 100644
index 0000000..dfc6268
--- /dev/null
+++ b/src/net/netparams.py
@@ -0,0 +1,157 @@
+
+import pickle, argparse, json, os, sys
+from keras import backend as K
+import numpy as np
+
+
+# argparser = argparse.ArgumentParser(
+#     description='dourflow: a keras YOLO V2 implementation.')
+#
+#
+# argparser.add_argument(
+#     'action',
+#     help='what to do: \'train\', \'validate\', \'cam\' '
+#     'or pass a video, image file/dir.')
+#
+# argparser.add_argument(
+#     '-m',
+#     '--model',
+#     help='path to input yolo v2 keras model',
+#     default='coco_model.h5')
+#
+#
+# argparser.add_argument(
+#     '-c',
+#     '--conf',
+#     help='path to configuration file',
+#     default='confs/config_coco.json')
+#
+#
+# argparser.add_argument(
+#     '-t',
+#     '--threshold',
+#     type=float,
+#     help='detection threshold',
+#     default=0.3)
+#
+#
+# argparser.add_argument(
+#         '-w',
+#         '--weight_file',
+#         help='path to weight file',
+#         default='weights.h5')
+#
+#
+# argparser.add_argument(
+#         '--gif',
+#         help='video output stored as gif also',
+#         action='store_true')
+#
+#
+# args = argparser.parse_args()
+
+
+# action = args.action
+config_path = "confs/config_coco.json"
+
+with open(config_path) as config_buffer:
+         config = json.loads(config_buffer.read())
+
+
+def generate_yolo_grid(batch, g, num_bb):
+    c_x = K.cast(K.reshape(K.tile(K.arange(g), [g]), (1, g, g, 1, 1)), K.floatx())
+    c_y = K.permute_dimensions(c_x, (0,2,1,3,4))
+    return K.tile(K.concatenate([c_x, c_y], -1), [batch, 1, 1, num_bb, 1])
+ 
+
+def get_threshold(value):
+    if value > 1. or value < 0:
+        raise ValueError('Please enter a valid threshold (between 0. and 1.).')
+    return value
+
+
+
+class YoloParams(object):
+    
+    # Mode
+    PREDICT_IMAGE = ''
+    WEIGHT_FILE = ''
+    WEBCAM_OUT = ''
+    GEN_ANCHORS_PATH = ''
+
+    # if action in ['genw', 'generate_weights']:
+    #     assert args.weight_file, "Need to pass weight file if generating model."
+    #     WEIGHT_FILE = args.weight_file
+    # elif action == 'cams':
+    #     WEBCAM_OUT = 'cam_out.mp4'
+    #     YOLO_MODE = 'cam'
+    # elif action in ['genp', 'generate_priors']:
+    #     current_anchors_path = config['config_path']['anchors']
+    #     GEN_ANCHORS_PATH = os.path.join(os.path.dirname(current_anchors_path),
+    #      'custom_'+os.path.basename(current_anchors_path))
+    #     YOLO_MODE = 'genp'
+    # else:
+    #     if action in ['validate', 'train', 'cam']:
+    #         YOLO_MODE = action
+    #     else:
+    #         if os.path.isdir(action):
+    #             YOLO_MODE = 'inference'
+    #         elif os.path.isfile(action):
+    #             if action.split('.')[1] in ['mp4','avi','wmv','mpg','mpeg']:
+    #                 YOLO_MODE = 'video'
+    #             else:
+    #                 YOLO_MODE = 'inference'
+    #         else:
+    #             raise ValueError('Run \'python3 dourflow.py --help\'.')
+    #
+    #         PREDICT_IMAGE = action
+        
+    #Paths 
+    TRAIN_IMG_PATH = config['train']['image_folder'] 
+    TRAIN_ANN_PATH = config['train']['annot_folder']
+
+    VALIDATION_IMG_PATH = config['valid']['image_folder']
+    VALIDATION_ANN_PATH = config['valid']['annot_folder']
+    VALIDATION_OUT_PATH = config['valid']['pred_folder']
+
+    #STORE_GIF = args.gif
+
+    # Model
+    IN_MODEL = "coco_model.h5"
+        
+    OUT_MODEL_NAME = config['train']['out_model_name']
+    ARCH_FNAME = config['config_path']['arch_plotname']
+
+    # Classes
+    CLASS_LABELS = [x.rstrip() for x in open(config['config_path']['labels'])]
+    NUM_CLASSES = len(CLASS_LABELS)
+    CLASS_TO_INDEX = dict(zip(CLASS_LABELS, np.arange(NUM_CLASSES)))
+
+    # Infrastructure params
+    INPUT_SIZE = config['model']['input_size']
+    GRID_SIZE = config['model']['grid_size']
+    TRUE_BOX_BUFFER = config['model']['true_box_buffer']
+    
+    if config['config_path']['anchors']:
+        ANCHORS = [float(a) for a in open(config['config_path']['anchors']).read().split(', ')]
+        NUM_BOUNDING_BOXES = len(ANCHORS) // 2
+        OBJECT_SCALE = 5.0
+        NO_OBJECT_SCALE  = 1.0
+        CLASS_SCALE = 1.0
+        COORD_SCALE = 1.0
+
+        # Train params
+        BATCH_SIZE = config['train']['batch_size']
+        L_RATE = config['train']['learning_rate']
+        NUM_EPOCHS = config['train']['num_epochs']
+        TRAIN_VERBOSE = config['train']['verbose']
+
+        # Thresholding
+        IOU_THRESHOLD = get_threshold(config['model']['iou_threshold'])
+        NMS_THRESHOLD = get_threshold(config['model']['nms_threshold'])
+        DETECTION_THRESHOLD = get_threshold(0.3)
+
+        # Additional / Precomputing  
+        c_grid = generate_yolo_grid(BATCH_SIZE, GRID_SIZE, NUM_BOUNDING_BOXES)
+        anchors = np.reshape(ANCHORS, [1,1,1,NUM_BOUNDING_BOXES,2])
+
diff --git a/src/net/utils.py b/src/net/utils.py
new file mode 100644
index 0000000..1ad340a
--- /dev/null
+++ b/src/net/utils.py
@@ -0,0 +1,259 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import os, errno
+import xml.etree.ElementTree as ET
+
+import tensorflow as tf
+import copy
+import cv2
+
+from moviepy.editor import VideoFileClip
+
+
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:  # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise
+
+
+def compute_iou(bb_1, bb_2):
+
+    xa0, ya0, xa1, ya1 = bb_1
+    xb0, yb0, xb1, yb1 = bb_2
+
+    intersec = (min([xa1, xb1]) - max([xa0, xb0]))*(min([ya1, yb1]) - max([ya0, yb0]))
+
+    union = (xa1 - xa0)*(ya1 - ya0) + (xb1 - xb0)*(yb1 - yb0) - intersec
+
+    return intersec / union
+
+
+def benchmark_timings(data, path=''):
+
+    fig = plt.figure(figsize=(10,15))
+    ax = plt.gca()
+    df = pd.DataFrame(data)
+    df.plot(ax=ax, kind='area', subplots=True)
+    plt.savefig(path + 'timings.png', format='png')
+    plt.close()
+
+    df2 = df.apply(lambda x: x/df['total'], axis=0)[['decode', 'prediction', 'prepro']]
+
+    fig = plt.figure(figsize=(20,13))
+    ax = fig.add_subplot(111)
+    df2.plot(ax=ax)
+    vals = ax.get_yticks()
+    ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals])
+    plt.savefig(path + 'timings_combined.png', format='png')
+    plt.close()
+
+
+
+
+def space_to_depth_x2(x):
+    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
+    # Import currently required to make Lambda work.
+    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
+    
+    # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
+
+
+    # tf.space_to_depth:
+    # Input: [batch, height, width, depth]
+    # Output: [batch, height/block_size, width/block_size, depth*block_size*block_size]
+    # Example: [1,4,4,1] -> [1,2,2,4] or in this case [?,38,38,64] -> [?,19,19,256]
+    # This operation is useful for resizing the activations between convolutions (but keeping all data),
+    # e.g. instead of pooling. It is also useful for training purely convolutional models.
+
+    # space_to_depth_x2 is just tf.space_to_depth wrapped with block_size=2
+
+
+    # Example
+    """
+    input shape = (4,4,1)
+    
+    [
+        [[1], [2], [3], [4]],
+        [[5], [6], [7], [8]],
+        [[9], [10], [11], [12]],
+        [[13], [14], [15], [16]]
+    ]
+    
+    is divided into the following chunks (block_size, block_size, channels):
+    
+    [[[1], [2]],       [[[3], [4]],
+     [[5], [6]]]        [[7], [8]]]
+    
+    [[[9], [10],]      [[[11], [12]],
+     [[13], [14]]]      [[15], [16]]]
+     
+     flatten each chunk to a single array:
+
+    [[1, 2, 5, 6]],      [[3, 4, 7, 8]]
+    [[9, 10, 13, 14]],    [[11, 12, 15, 16]]
+
+
+    spatially rearrange chunks according to their initial position:
+    
+    [
+        [[1, 2, 5, 6]], [[3, 4, 7, 8]],
+        [[9 10, 13, 14]], [[11, 12, 15, 16]]
+    ]
+    
+    output shape = (2,2,4)             
+    """
+    import tensorflow as tf
+    return tf.space_to_depth(x, block_size=2)
+
+
+def draw_boxes(image_in, info):
+    image = image_in.copy()
+    image_h, image_w, _ = image.shape
+
+    boxes, scores, labels = info
+    color_mod = 255
+
+    for i in range(len(boxes)):
+        xmin = int(boxes[i][0]*image_w)
+        ymin = int(boxes[i][1]*image_h)
+        xmax = int(boxes[i][2]*image_w)
+        ymax = int(boxes[i][3]*image_h)  
+
+        if scores is None:
+            #text = "%s"%(labels[i])
+            text = ''
+            color_mod = 0
+        else:
+            text = "%s (%.1f%%)"%(labels[i], 100*scores[i])
+
+        cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (color_mod,255,0), 2)
+
+        cv2.putText(image, 
+                    text, 
+                    (xmin, ymin - 15), 
+                    cv2.FONT_HERSHEY_COMPLEX, 
+                    1e-3 * image_h, 
+                    (color_mod,255,0), 1)
+    return image          
+        
+
+def parse_annotation(ann_dir, img_dir, labels=[]):
+    # from https://github.com/experiencor/keras-yolo2/blob/master/preprocessing.py
+    all_imgs = []
+    seen_labels = {}
+    # go through annotations by sorted filename
+    for ann in sorted(os.listdir(ann_dir)):
+        img = {'object':[]}
+        tree = ET.parse(os.path.join(ann_dir, ann))
+        
+        for elem in tree.iter():
+            if 'filename' in elem.tag:
+                img['filename'] = os.path.join(img_dir, elem.text)
+            if 'width' in elem.tag:
+                img['width'] = int(elem.text)
+            if 'height' in elem.tag:
+                img['height'] = int(elem.text)
+            if 'object' in elem.tag or 'part' in elem.tag:
+                obj = {}
+                
+                for attr in list(elem):
+                    if 'name' in attr.tag:
+                        obj['name'] = attr.text
+
+                        if obj['name'] in seen_labels:
+                            seen_labels[obj['name']] += 1
+                        else:
+                            seen_labels[obj['name']] = 1
+                        
+                        if len(labels) > 0 and obj['name'] not in labels:
+                            break
+                        else:
+                            img['object'] += [obj]
+                            
+                    if 'bndbox' in attr.tag:
+                        for dim in list(attr):
+                            if 'xmin' in dim.tag:
+                                obj['xmin'] = int(round(float(dim.text)))
+                            if 'ymin' in dim.tag:
+                                obj['ymin'] = int(round(float(dim.text)))
+                            if 'xmax' in dim.tag:
+                                obj['xmax'] = int(round(float(dim.text)))
+                            if 'ymax' in dim.tag:
+                                obj['ymax'] = int(round(float(dim.text)))
+
+        if len(img['object']) > 0:
+            all_imgs += [img]
+    
+    # all_imgs: [img1, img2, img3, ..]
+    # 
+    """
+    img: 
+        {'object' : [{'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': }, # object 1
+                    {'name': 'class1', 'xmin': , 'ymin': , 'xmax': , 'ymax': },  # object 2
+                    {'name': 'class2', 'xmin': , 'ymin': , 'xmax': , 'ymax': }]  # object 3
+         'filename' : <where the image file is stored>,
+         'width':, 
+         'height': 
+            }
+    """
+    # seen_labels: {'classname': count}
+    return all_imgs
+
+
+def setup_logging(logging_path='logs'):
+
+    log_path = os.path.join(os.getcwd(),logging_path)
+    mkdir_p(log_path)
+
+    check_names = lambda y: y if y.isdigit() else -1
+    get_ind = lambda x: int(check_names(x.split('_')[1]))
+    
+    run_counter = max(map(get_ind, os.listdir(log_path)), default=-1) + 1
+
+    run_path = os.path.join(log_path, 'run_%s'%run_counter)
+    mkdir_p(run_path)
+
+    print('Logging set up, to monitor training run:\n'
+        '\t\'tensorboard --logdir=%s\'\n'%run_path)
+
+    return run_path
+
+
+
+
+def handle_empty_indexing(arr, idx):
+    if idx.size > 0:
+        return arr[idx]
+    return []
+
+
+def generate_gif(filename):
+    outname = filename.split('.')[-2] + '.gif'
+    VideoFileClip(filename).speedx(2.5).resize(0.5).write_gif(
+        outname,fps=20, program='ffmpeg', fuzz=3)
+    print('\n')
+
+
+if __name__ == '__main__':
+
+    imgs, cnts = parse_annotation('/home/kiran/Downloads/VOCdevkit/VOC2012/Annotations/','/home/kiran/Downloads/VOCdevkit/VOC2012/JPEGImages/')
+    imgs, cnts = parse_annotation('/home/kiran/Downloads/VOCdevkit2007/VOC2007/Annotations/','/home/kiran/Downloads/VOCdevkit2007/VOC2007/JPEGImages/')
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/objects_identifier_of_frames.py b/src/objects_identifier_of_frames.py
index 603e47b..4a93115 100644
--- a/src/objects_identifier_of_frames.py
+++ b/src/objects_identifier_of_frames.py
@@ -17,43 +17,10 @@
 from keras.preprocessing import image
 from keras.applications import resnet50
 import os
+from yolo.model import YOLO
 
 def generate_object_list_of_frames(input_frames_path,img_width, img_height):
-
-    images = []                       # List to keep scaled frame data
-    frame_names = []                  # List to keep names of frames
-
-    # Load Keras' ResNet50 model that was pre-trained against the ImageNet database
-    model = resnet50.ResNet50()
-
-    frames_list = os.listdir(input_frames_path)
-    frames_list.sort(key=lambda x: int(x[5:-4]))
-
-    for image_name in frames_list:
-        frame_names.append(image_name)
-        img = image.load_img(input_frames_path+image_name, target_size=(img_width, img_height))
-        img = image.img_to_array(img)
-        img = np.expand_dims(img, axis=0)
-        images.append(img)
-
-    images = np.vstack(images)
-    images = resnet50.preprocess_input(images)
-    predictions = model.predict(images)
-    predicted_classes = resnet50.decode_predictions(predictions, top=10)
-
-    index = 0
-    frames_predictions_dictionary = {}
-    for i in predicted_classes:
-        object_list = []
-        for imagenet_id, name, likelihood in i:
-            #print(" - {}: {:2f} likelihood".format(name, likelihood))
-            #if(likelihood>20):
-            object_list.append(name)
-
-        frames_predictions_dictionary[frame_names[index]] = object_list
-        index += 1
-
-    return frames_predictions_dictionary
+    return YOLO().predict(input_frames_path)
 
 def run():
     # image folder
diff --git a/src/yolo/model.py b/src/yolo/model.py
new file mode 100644
index 0000000..a51466d
--- /dev/null
+++ b/src/yolo/model.py
@@ -0,0 +1,45 @@
+import os
+import matplotlib.pyplot as plt
+import cv2
+from tqdm import tqdm
+
+from src.net.netarch import YoloArchitecture,YoloInferenceModel
+
+
+class YOLO(object):
+
+    def __init__(self):
+        self.debug_timings = True
+        self.yolo_arch = YoloArchitecture()
+        self.model = self.yolo_arch.get_model()
+        self.inf_model = YoloInferenceModel(self.model)
+
+    def predict(self,path):
+        frames_predictions_dictionary = {}
+        #checking whether the given path is a directory
+        if os.path.isdir(path):
+            fnames = [os.path.join(path, f) for f in os.listdir(path)
+                      if os.path.isfile(os.path.join(path, f))]
+
+        else:
+            fnames = [path]
+            flag = False
+
+        for f in tqdm(fnames, desc='Processing Batch'):
+            image = cv2.imread(f)
+            labels_limited = [None]*10
+            labels = self.inf_model.predict(image.copy())
+            size = len(labels)
+            for i in range(size):
+                if(i==10):
+                    break
+                labels_limited[i] = labels[i]
+            test = str(f)[len(path):]
+            frames_predictions_dictionary[str(f)[len(path):]] = labels_limited
+            print (labels_limited)
+
+
+
+
+        print ("Process Finished")
+        return frames_predictions_dictionary
\ No newline at end of file
diff --git a/src/yolo_test.py b/src/yolo_test.py
new file mode 100644
index 0000000..5a17fc0
--- /dev/null
+++ b/src/yolo_test.py
@@ -0,0 +1,6 @@
+from yolo.model import YOLO
+
+YOLO().predict("D:\Campus\FYP\\video-summarization\src\\test_data\generated_frames")
+
+
+