test for huge update

pirazor · Mar 18, 2018 · 4c3f4c4 · 4c3f4c4
1 parent 59f785d
commit 4c3f4c4
Show file tree

Hide file tree

Showing 14 changed files with 791 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -2,36 +2,71 @@
 
 Semantic Segmentation easy code for keras users.
 
-We use [Cityscape dataset](https://www.cityscapes-dataset.com/) for training various models.
+We use [cityscape dataset](https://www.cityscapes-dataset.com/) for training various models.
 
-Use pretrained VGG16 weight! You can [download weights](
-'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')
+Use pretrained VGG16 weight for FCN and U-net! You can 
+[download weights](https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5)
 offered by keras.
 
 ### Tested Env
 - python 2 & 3
 - tensorflow 1.5
 - keras 2.1.4
+- opencv 3.3
+
+### File Description
+| File | Description |
+|:------|:------------|
+| train.py | 
+| test.py | Just predict one picture 
+| dataest_parser/make_h5.py | Parse cityscape dataset and make h5py file. |
+| dataest_parser/generator.py | Data_generator with augmentation using data.h5 |
+| model/ | Folder that contains various models for semantic segmentation |
+| segmentation_dh/ | Experiment folder for Anthony Kim(useless contents for users) |
+| segmentation_tk/ | Experiment folder for TaeKang Woo(useless contents for users) |
+| temp/ | Folder that contains various scripts we used(useless contents for users) |
+
+### Implement Details
+We used only **three classes** in the [cityscape dataset](https://www.cityscapes-dataset.com/) for a simple implementation.
+
 
 ### Simple Tutorial
-First, you have to make .h5 file with data!
+**First**, you have to make .h5 file with data!
 ```bash
-python make_h5.py --path "/downloaded/leftImg8bit/path/" --gtpath "/downloaded/gtFine/path/"
+python3 dataset_parser/make_h5.py --path "/downloaded/leftImg8bit/path/" --gtpath "/downloaded/gtFine/path/"
 ```
+After you run above command, 'data.h5' file will appear in dataset_parser folder.
 
-Second, Train your model!
+**Second**, Train your model!
+```bash
+python3 train.py --model fcn
+```
+| Option | Description |
+|:-------|:------------|
+| --model | Model to train. \['fcn', 'unet', 'pspnet'\] |
+| --train_batch | Batch size for train. |
+| --val_batch | Batch size for validation. |
+| --lr_init | Initial learning rate. |
+| --lr_decay | How much to decay the learning rate. |
+| --vgg | Pretrained vgg16 weight path. |
 
-Finally, test your model!
+**Finally**, test your model!
+```bash
+python3 test.py --model fcn
+```
+| Option | Description |
+|:-------|:------------|
+| --model | Model to test. \['fcn', 'unet', 'pspnet'\] |
+| --img_path | The image path you want to test |
 
 ### Todo
 - [x] FCN
 - [x] Unet
-- [ ] PSPnet
-- [ ] DeepLab_v1
-- [ ] DeepLab_v2
+- [x] PSPnet
 - [ ] DeepLab_v3
+- [ ] Mask_RCNN
 
 ### Contact us!
 Anthony Kim: [email protected]
 
-TaeKang Woo: [email protected]
+TaeKang Woo: [email protected]
diff --git a/callbacks.py b/callbacks.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+from keras.callbacks import Callback
+from keras import backend as K
+
+import cv2
+import numpy as np
+import os
+
+
+class TrainCheck(Callback):
+    def __init__(self, output_path, model_name):
+        self.epoch = 0
+        self.output_path = output_path
+        self.model_name = model_name
+
+    def result_map_to_img(self, res_map):
+        img = np.zeros((256, 512, 3), dtype=np.uint8)
+        res_map = np.squeeze(res_map)
+
+        argmax_idx = np.argmax(res_map, axis=2)
+
+        # For np.where calculation.
+        person = (argmax_idx == 1)
+        car = (argmax_idx == 2)
+        road = (argmax_idx == 3)
+
+        img[:, :, 0] = np.where(person, 255, 0)
+        img[:, :, 1] = np.where(car, 255, 0)
+        img[:, :, 2] = np.where(road, 255, 0)
+
+        return img
+
+    def on_epoch_end(self, epoch, logs={}):
+        self.epoch = epoch+1
+        self.visualize('img/test.png')
+
+    def visualize(self, path):
+        img = cv2.imread(path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = np.expand_dims(img, 0)
+        img = img / 127.5 - 1
+
+        pred = self.model.predict(img)
+        res_img = self.result_map_to_img(pred[0])
+
+        cv2.imwrite(os.path.join(self.output_path, self.model_name + '_epoch_' + str(self.epoch) + '.png'), res_img)
diff --git a/img/test.png b/img/test.png
diff --git a/model/__init__.py b/model/__init__.py
diff --git a/model/fcn.py b/model/fcn.py
@@ -0,0 +1,128 @@
+from keras.models import Model
+from keras.layers import Input
+from keras.layers.core import Lambda, Activation
+from keras.layers.convolutional import Conv2D
+from keras.layers.pooling import MaxPooling2D
+from keras.layers.merge import Add
+from keras.layers.normalization import BatchNormalization
+from keras.optimizers import Adam
+from keras import backend as K
+
+import tensorflow as tf
+
+
+def dice_coef(y_true, y_pred):
+    return (2. * K.sum(y_true * y_pred) + 1.) / (K.sum(y_true) + K.sum(y_pred) + 1.)
+
+
+def fcn_8s(num_classes, input_shape, lr_init, lr_decay, vgg_weight_path=None):
+    img_input = Input(input_shape)
+
+    # Block 1
+    x = Conv2D(64, (3, 3), padding='same', name='block1_conv1')(img_input)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = MaxPooling2D()(x)
+
+    # Block 2
+    x = Conv2D(128, (3, 3), padding='same', name='block2_conv1')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(128, (3, 3), padding='same', name='block2_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = MaxPooling2D()(x)
+
+    # Block 3
+    x = Conv2D(256, (3, 3), padding='same', name='block3_conv1')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(256, (3, 3), padding='same', name='block3_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(256, (3, 3), padding='same', name='block3_conv3')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    block_3_out = MaxPooling2D()(x)
+
+    # Block 4
+    x = Conv2D(512, (3, 3), padding='same', name='block4_conv1')(block_3_out)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(512, (3, 3), padding='same', name='block4_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(512, (3, 3), padding='same', name='block4_conv3')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    block_4_out = MaxPooling2D()(x)
+
+    # Block 5
+    x = Conv2D(512, (3, 3), padding='same', name='block5_conv1')(block_4_out)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(512, (3, 3), padding='same', name='block5_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(512, (3, 3), padding='same', name='block5_conv3')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = MaxPooling2D()(x)
+
+    # Load pretrained weights.
+    if vgg_weight_path is not None:
+        vgg16 = Model(img_input, x)
+        vgg16.load_weights(vgg_weight_path, by_name=True)
+
+    # Convolutinalized fully connected layer.
+    x = Conv2D(4096, (7, 7), activation='relu', padding='same')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = Conv2D(4096, (1, 1), activation='relu', padding='same')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    # Classifying layers.
+    x = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(x)
+    x = BatchNormalization()(x)
+
+    block_3_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_3_out)
+    block_3_out = BatchNormalization()(block_3_out)
+
+    block_4_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_4_out)
+    block_4_out = BatchNormalization()(block_4_out)
+
+    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x)
+    x = Add()([x, block_4_out])
+    x = Activation('relu')(x)
+
+    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x)
+    x = Add()([x, block_3_out])
+    x = Activation('relu')(x)
+
+    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 8, x.shape[2] * 8)))(x)
+
+    x = Activation('softmax')(x)
+
+    model = Model(img_input, x)
+    model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay),
+                  loss='categorical_crossentropy',
+                  metrics=[dice_coef])
+
+    return model
diff --git a/model/pspnet.py b/model/pspnet.py
@@ -0,0 +1,125 @@
+from keras.models import Model
+from keras.layers import Input
+from keras.layers.convolutional import Conv2D, Conv2DTranspose
+from keras.layers.pooling import MaxPooling2D, AveragePooling2D
+from keras.layers.core import Activation, Dropout, Lambda
+from keras.layers.normalization import BatchNormalization
+from keras.layers.merge import add, concatenate
+from keras.optimizers import Adam
+from keras import backend as K
+
+import tensorflow as tf
+
+
+def dice_coef(y_true, y_pred):
+    return (2. * K.sum(y_true * y_pred) + 1.) / (K.sum(y_true) + K.sum(y_pred) + 1.)
+
+
+def conv_block(input_tensor, filters, strides, d_rates):
+    x = Conv2D(filters[0], kernel_size=1, dilation_rate=d_rates[0])(input_tensor)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters[1], kernel_size=3, strides=strides, padding='same', dilation_rate=d_rates[1])(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters[2], kernel_size=1, dilation_rate=d_rates[2])(x)
+    x = BatchNormalization()(x)
+
+    shortcut = Conv2D(filters[2], kernel_size=1, strides=strides)(input_tensor)
+    shortcut = BatchNormalization()(shortcut)
+
+    x = add([x, shortcut])
+    x = Activation('relu')(x)
+
+    return x
+
+
+def identity_block(input_tensor, filters, d_rates):
+    x = Conv2D(filters[0], kernel_size=1, dilation_rate=d_rates[0])(input_tensor)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters[1], kernel_size=3, padding='same', dilation_rate=d_rates[1])(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(filters[2], kernel_size=1, dilation_rate=d_rates[2])(x)
+    x = BatchNormalization()(x)
+
+    x = add([x, input_tensor])
+    x = Activation('relu')(x)
+
+    return x
+
+
+def pyramid_pooling_block(input_tensor, bin_sizes):
+    concat_list = [input_tensor]
+    h = input_tensor.shape[1].value
+    w = input_tensor.shape[2].value
+
+    for bin_size in bin_sizes:
+        x = AveragePooling2D(pool_size=(h//bin_size, w//bin_size), strides=(h//bin_size, w//bin_size))(input_tensor)
+        x = Conv2D(512, kernel_size=1)(x)
+        x = Lambda(lambda x: tf.image.resize_images(x, (h, w)))(x)
+
+        concat_list.append(x)
+
+    return concatenate(concat_list)
+
+
+def pspnet50(num_classes, input_shape, lr_init, lr_decay):
+    img_input = Input(input_shape)
+
+    x = Conv2D(64, kernel_size=3, strides=(2, 2), padding='same')(img_input)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(64, kernel_size=3, strides=(1, 1), padding='same')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = Conv2D(128, kernel_size=3, strides=(1, 1), padding='same')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+
+    x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
+
+    x = conv_block(x, filters=[64, 64, 256], strides=(1, 1), d_rates=[1, 1, 1])
+    x = identity_block(x, filters=[64, 64, 256], d_rates=[1, 1, 1])
+    x = identity_block(x, filters=[64, 64, 256], d_rates=[1, 1, 1])
+
+    x = conv_block(x, filters=[128, 128, 512], strides=(2, 2), d_rates=[1, 1, 1])
+    x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1])
+    x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1])
+    x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1])
+
+    x = conv_block(x, filters=[256, 256, 1024], strides=(1, 1), d_rates=[1, 2, 1])
+    x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1])
+    x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1])
+    x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1])
+    x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1])
+    x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1])
+
+    x = conv_block(x, filters=[512, 512, 2048], strides=(1, 1), d_rates=[1, 4, 1])
+    x = identity_block(x, filters=[512, 512, 2048], d_rates=[1, 4, 1])
+    x = identity_block(x, filters=[512, 512, 2048], d_rates=[1, 4, 1])
+
+    x = pyramid_pooling_block(x, [1, 2, 3, 6])
+
+    x = Conv2D(512, kernel_size=3, padding='same')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = Dropout(0.1)(x)
+
+    x = Conv2D(num_classes, kernel_size=1)(x)
+    x = Conv2DTranspose(num_classes, kernel_size=(16, 16), strides=(8, 8), padding='same')(x)
+    x = Activation('softmax')(x)
+
+    model = Model(img_input, x)
+    model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay),
+                  loss='categorical_crossentropy',
+                  metrics=[dice_coef])
+
+    return model