forked from dhkim0225/keras-image-segmentation
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
791 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,36 +2,71 @@ | |
|
||
Semantic Segmentation easy code for keras users. | ||
|
||
We use [Cityscape dataset](https://www.cityscapes-dataset.com/) for training various models. | ||
We use [cityscape dataset](https://www.cityscapes-dataset.com/) for training various models. | ||
|
||
Use pretrained VGG16 weight! You can [download weights]( | ||
'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5') | ||
Use pretrained VGG16 weight for FCN and U-net! You can | ||
[download weights](https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5) | ||
offered by keras. | ||
|
||
### Tested Env | ||
- python 2 & 3 | ||
- tensorflow 1.5 | ||
- keras 2.1.4 | ||
- opencv 3.3 | ||
|
||
### File Description | ||
| File | Description | | ||
|:------|:------------| | ||
| train.py | | ||
| test.py | Just predict one picture | ||
| dataest_parser/make_h5.py | Parse cityscape dataset and make h5py file. | | ||
| dataest_parser/generator.py | Data_generator with augmentation using data.h5 | | ||
| model/ | Folder that contains various models for semantic segmentation | | ||
| segmentation_dh/ | Experiment folder for Anthony Kim(useless contents for users) | | ||
| segmentation_tk/ | Experiment folder for TaeKang Woo(useless contents for users) | | ||
| temp/ | Folder that contains various scripts we used(useless contents for users) | | ||
|
||
### Implement Details | ||
We used only **three classes** in the [cityscape dataset](https://www.cityscapes-dataset.com/) for a simple implementation. | ||
|
||
|
||
### Simple Tutorial | ||
First, you have to make .h5 file with data! | ||
**First**, you have to make .h5 file with data! | ||
```bash | ||
python make_h5.py --path "/downloaded/leftImg8bit/path/" --gtpath "/downloaded/gtFine/path/" | ||
python3 dataset_parser/make_h5.py --path "/downloaded/leftImg8bit/path/" --gtpath "/downloaded/gtFine/path/" | ||
``` | ||
After you run above command, 'data.h5' file will appear in dataset_parser folder. | ||
|
||
Second, Train your model! | ||
**Second**, Train your model! | ||
```bash | ||
python3 train.py --model fcn | ||
``` | ||
| Option | Description | | ||
|:-------|:------------| | ||
| --model | Model to train. \['fcn', 'unet', 'pspnet'\] | | ||
| --train_batch | Batch size for train. | | ||
| --val_batch | Batch size for validation. | | ||
| --lr_init | Initial learning rate. | | ||
| --lr_decay | How much to decay the learning rate. | | ||
| --vgg | Pretrained vgg16 weight path. | | ||
|
||
Finally, test your model! | ||
**Finally**, test your model! | ||
```bash | ||
python3 test.py --model fcn | ||
``` | ||
| Option | Description | | ||
|:-------|:------------| | ||
| --model | Model to test. \['fcn', 'unet', 'pspnet'\] | | ||
| --img_path | The image path you want to test | | ||
|
||
### Todo | ||
- [x] FCN | ||
- [x] Unet | ||
- [ ] PSPnet | ||
- [ ] DeepLab_v1 | ||
- [ ] DeepLab_v2 | ||
- [x] PSPnet | ||
- [ ] DeepLab_v3 | ||
- [ ] Mask_RCNN | ||
|
||
### Contact us! | ||
Anthony Kim: [email protected] | ||
|
||
TaeKang Woo: [email protected] | ||
TaeKang Woo: [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from __future__ import print_function | ||
from keras.callbacks import Callback | ||
from keras import backend as K | ||
|
||
import cv2 | ||
import numpy as np | ||
import os | ||
|
||
|
||
class TrainCheck(Callback): | ||
def __init__(self, output_path, model_name): | ||
self.epoch = 0 | ||
self.output_path = output_path | ||
self.model_name = model_name | ||
|
||
def result_map_to_img(self, res_map): | ||
img = np.zeros((256, 512, 3), dtype=np.uint8) | ||
res_map = np.squeeze(res_map) | ||
|
||
argmax_idx = np.argmax(res_map, axis=2) | ||
|
||
# For np.where calculation. | ||
person = (argmax_idx == 1) | ||
car = (argmax_idx == 2) | ||
road = (argmax_idx == 3) | ||
|
||
img[:, :, 0] = np.where(person, 255, 0) | ||
img[:, :, 1] = np.where(car, 255, 0) | ||
img[:, :, 2] = np.where(road, 255, 0) | ||
|
||
return img | ||
|
||
def on_epoch_end(self, epoch, logs={}): | ||
self.epoch = epoch+1 | ||
self.visualize('img/test.png') | ||
|
||
def visualize(self, path): | ||
img = cv2.imread(path) | ||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | ||
img = np.expand_dims(img, 0) | ||
img = img / 127.5 - 1 | ||
|
||
pred = self.model.predict(img) | ||
res_img = self.result_map_to_img(pred[0]) | ||
|
||
cv2.imwrite(os.path.join(self.output_path, self.model_name + '_epoch_' + str(self.epoch) + '.png'), res_img) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
from keras.models import Model | ||
from keras.layers import Input | ||
from keras.layers.core import Lambda, Activation | ||
from keras.layers.convolutional import Conv2D | ||
from keras.layers.pooling import MaxPooling2D | ||
from keras.layers.merge import Add | ||
from keras.layers.normalization import BatchNormalization | ||
from keras.optimizers import Adam | ||
from keras import backend as K | ||
|
||
import tensorflow as tf | ||
|
||
|
||
def dice_coef(y_true, y_pred): | ||
return (2. * K.sum(y_true * y_pred) + 1.) / (K.sum(y_true) + K.sum(y_pred) + 1.) | ||
|
||
|
||
def fcn_8s(num_classes, input_shape, lr_init, lr_decay, vgg_weight_path=None): | ||
img_input = Input(input_shape) | ||
|
||
# Block 1 | ||
x = Conv2D(64, (3, 3), padding='same', name='block1_conv1')(img_input) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = MaxPooling2D()(x) | ||
|
||
# Block 2 | ||
x = Conv2D(128, (3, 3), padding='same', name='block2_conv1')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(128, (3, 3), padding='same', name='block2_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = MaxPooling2D()(x) | ||
|
||
# Block 3 | ||
x = Conv2D(256, (3, 3), padding='same', name='block3_conv1')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(256, (3, 3), padding='same', name='block3_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(256, (3, 3), padding='same', name='block3_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
block_3_out = MaxPooling2D()(x) | ||
|
||
# Block 4 | ||
x = Conv2D(512, (3, 3), padding='same', name='block4_conv1')(block_3_out) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(512, (3, 3), padding='same', name='block4_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(512, (3, 3), padding='same', name='block4_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
block_4_out = MaxPooling2D()(x) | ||
|
||
# Block 5 | ||
x = Conv2D(512, (3, 3), padding='same', name='block5_conv1')(block_4_out) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(512, (3, 3), padding='same', name='block5_conv2')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(512, (3, 3), padding='same', name='block5_conv3')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = MaxPooling2D()(x) | ||
|
||
# Load pretrained weights. | ||
if vgg_weight_path is not None: | ||
vgg16 = Model(img_input, x) | ||
vgg16.load_weights(vgg_weight_path, by_name=True) | ||
|
||
# Convolutinalized fully connected layer. | ||
x = Conv2D(4096, (7, 7), activation='relu', padding='same')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
x = Conv2D(4096, (1, 1), activation='relu', padding='same')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
# Classifying layers. | ||
x = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(x) | ||
x = BatchNormalization()(x) | ||
|
||
block_3_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_3_out) | ||
block_3_out = BatchNormalization()(block_3_out) | ||
|
||
block_4_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_4_out) | ||
block_4_out = BatchNormalization()(block_4_out) | ||
|
||
x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x) | ||
x = Add()([x, block_4_out]) | ||
x = Activation('relu')(x) | ||
|
||
x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x) | ||
x = Add()([x, block_3_out]) | ||
x = Activation('relu')(x) | ||
|
||
x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 8, x.shape[2] * 8)))(x) | ||
|
||
x = Activation('softmax')(x) | ||
|
||
model = Model(img_input, x) | ||
model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay), | ||
loss='categorical_crossentropy', | ||
metrics=[dice_coef]) | ||
|
||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
from keras.models import Model | ||
from keras.layers import Input | ||
from keras.layers.convolutional import Conv2D, Conv2DTranspose | ||
from keras.layers.pooling import MaxPooling2D, AveragePooling2D | ||
from keras.layers.core import Activation, Dropout, Lambda | ||
from keras.layers.normalization import BatchNormalization | ||
from keras.layers.merge import add, concatenate | ||
from keras.optimizers import Adam | ||
from keras import backend as K | ||
|
||
import tensorflow as tf | ||
|
||
|
||
def dice_coef(y_true, y_pred): | ||
return (2. * K.sum(y_true * y_pred) + 1.) / (K.sum(y_true) + K.sum(y_pred) + 1.) | ||
|
||
|
||
def conv_block(input_tensor, filters, strides, d_rates): | ||
x = Conv2D(filters[0], kernel_size=1, dilation_rate=d_rates[0])(input_tensor) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(filters[1], kernel_size=3, strides=strides, padding='same', dilation_rate=d_rates[1])(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(filters[2], kernel_size=1, dilation_rate=d_rates[2])(x) | ||
x = BatchNormalization()(x) | ||
|
||
shortcut = Conv2D(filters[2], kernel_size=1, strides=strides)(input_tensor) | ||
shortcut = BatchNormalization()(shortcut) | ||
|
||
x = add([x, shortcut]) | ||
x = Activation('relu')(x) | ||
|
||
return x | ||
|
||
|
||
def identity_block(input_tensor, filters, d_rates): | ||
x = Conv2D(filters[0], kernel_size=1, dilation_rate=d_rates[0])(input_tensor) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(filters[1], kernel_size=3, padding='same', dilation_rate=d_rates[1])(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(filters[2], kernel_size=1, dilation_rate=d_rates[2])(x) | ||
x = BatchNormalization()(x) | ||
|
||
x = add([x, input_tensor]) | ||
x = Activation('relu')(x) | ||
|
||
return x | ||
|
||
|
||
def pyramid_pooling_block(input_tensor, bin_sizes): | ||
concat_list = [input_tensor] | ||
h = input_tensor.shape[1].value | ||
w = input_tensor.shape[2].value | ||
|
||
for bin_size in bin_sizes: | ||
x = AveragePooling2D(pool_size=(h//bin_size, w//bin_size), strides=(h//bin_size, w//bin_size))(input_tensor) | ||
x = Conv2D(512, kernel_size=1)(x) | ||
x = Lambda(lambda x: tf.image.resize_images(x, (h, w)))(x) | ||
|
||
concat_list.append(x) | ||
|
||
return concatenate(concat_list) | ||
|
||
|
||
def pspnet50(num_classes, input_shape, lr_init, lr_decay): | ||
img_input = Input(input_shape) | ||
|
||
x = Conv2D(64, kernel_size=3, strides=(2, 2), padding='same')(img_input) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(64, kernel_size=3, strides=(1, 1), padding='same')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = Conv2D(128, kernel_size=3, strides=(1, 1), padding='same')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
|
||
x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) | ||
|
||
x = conv_block(x, filters=[64, 64, 256], strides=(1, 1), d_rates=[1, 1, 1]) | ||
x = identity_block(x, filters=[64, 64, 256], d_rates=[1, 1, 1]) | ||
x = identity_block(x, filters=[64, 64, 256], d_rates=[1, 1, 1]) | ||
|
||
x = conv_block(x, filters=[128, 128, 512], strides=(2, 2), d_rates=[1, 1, 1]) | ||
x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1]) | ||
x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1]) | ||
x = identity_block(x, filters=[128, 128, 512], d_rates=[1, 1, 1]) | ||
|
||
x = conv_block(x, filters=[256, 256, 1024], strides=(1, 1), d_rates=[1, 2, 1]) | ||
x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1]) | ||
x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1]) | ||
x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1]) | ||
x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1]) | ||
x = identity_block(x, filters=[256, 256, 1024], d_rates=[1, 2, 1]) | ||
|
||
x = conv_block(x, filters=[512, 512, 2048], strides=(1, 1), d_rates=[1, 4, 1]) | ||
x = identity_block(x, filters=[512, 512, 2048], d_rates=[1, 4, 1]) | ||
x = identity_block(x, filters=[512, 512, 2048], d_rates=[1, 4, 1]) | ||
|
||
x = pyramid_pooling_block(x, [1, 2, 3, 6]) | ||
|
||
x = Conv2D(512, kernel_size=3, padding='same')(x) | ||
x = BatchNormalization()(x) | ||
x = Activation('relu')(x) | ||
x = Dropout(0.1)(x) | ||
|
||
x = Conv2D(num_classes, kernel_size=1)(x) | ||
x = Conv2DTranspose(num_classes, kernel_size=(16, 16), strides=(8, 8), padding='same')(x) | ||
x = Activation('softmax')(x) | ||
|
||
model = Model(img_input, x) | ||
model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay), | ||
loss='categorical_crossentropy', | ||
metrics=[dice_coef]) | ||
|
||
return model |
Oops, something went wrong.