Skip to content

Commit

Permalink
updated selim_sef-solution
Browse files Browse the repository at this point in the history
  • Loading branch information
dlindenbaum committed Apr 30, 2018
1 parent e3bb9d3 commit 28fc286
Show file tree
Hide file tree
Showing 62 changed files with 6,390 additions and 0 deletions.
50 changes: 50 additions & 0 deletions selem_sef-solution/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FROM nvidia/cuda:8.0-cudnn6-devel

MAINTAINER Selim Seferbekov <[email protected]>

ARG TENSORFLOW_VERSION=1.4.1
ARG TENSORFLOW_ARCH=gpu
ARG KERAS_VERSION=2.1.3

RUN apt-get update && \
apt-get install -y curl build-essential libpng12-dev libffi-dev \
libboost-all-dev \
libgflags-dev \
libgoogle-glog-dev \
libhdf5-serial-dev \
libleveldb-dev \
liblmdb-dev \
libopencv-dev \
libprotobuf-dev \
libsnappy-dev \
protobuf-compiler \
git \
&& \
apt-get clean && \
rm -rf /var/tmp /tmp /var/lib/apt/lists/*

RUN curl -sSL -o installer.sh https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh && \
bash /installer.sh -b -f && \
rm /installer.sh

ENV PATH "$PATH:/root/anaconda3/bin"

RUN pip --no-cache-dir install \
https://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_ARCH}/tensorflow_${TENSORFLOW_ARCH}-${TENSORFLOW_VERSION}-cp36-cp36m-linux_x86_64.whl

RUN pip install --no-cache-dir --no-dependencies keras==${KERAS_VERSION}
RUN conda install tqdm
RUN conda install -c conda-forge opencv
RUN pip install git+https://github.com/yxdragon/sknw
RUN pip install pygeoif
RUN pip install shapely
RUN pip install simplification

WORKDIR /work

COPY . /work/


RUN chmod 777 train.sh
RUN chmod 777 test.sh

37 changes: 37 additions & 0 deletions selem_sef-solution/calculate_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import numpy as np
from skimage.external import tifffile

from tqdm import tqdm

from params import args
from tools.mul_img_utils import stretch_8bit

cities = ['AOI_2_Vegas', 'AOI_3_Paris', 'AOI_4_Shanghai', 'AOI_5_Khartoum', ]


def calc_stats(img_dir):
city_mean_value = {}
for city in cities:
city_mean = []
city_mean_img = np.zeros((1300, 1300, 8))
num_images = 0
city_dir = os.path.join(img_dir, city + '_Roads_Train', 'MUL-PanSharpen')
for f in tqdm(os.listdir(city_dir)):
if f.endswith(".tif"):
arr = tifffile.imread(os.path.join(city_dir, f))
image = np.stack([arr[..., 4], arr[..., 2], arr[..., 1], arr[..., 0], arr[..., 3], arr[..., 5], arr[..., 6], arr[..., 7]], axis=-1)
image = stretch_8bit(image)
if image is not None:
city_mean_img += (image * 255.)
num_images += 1

for i in range(8):
city_mean.append(np.mean(city_mean_img[..., i] / num_images))
city_mean_value[city] = city_mean

return city_mean_value


if __name__ == '__main__':
print(calc_stats(args.img_dir))
Empty file.
202 changes: 202 additions & 0 deletions selem_sef-solution/datasets/spacenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import os
import random
import time

import cv2
import numpy as np
import pandas as pd
import pygeoif
from keras.preprocessing.image import Iterator, img_to_array, load_img
from skimage.external.tifffile import tifffile

from tools.mul_img_utils import stretch_8bit
from tools.stats import mean_bands

cities = ['AOI_2_Vegas', 'AOI_3_Paris', 'AOI_4_Shanghai', 'AOI_5_Khartoum']

os.makedirs("masks", exist_ok=True)
def get_city_id(city_dir):
return next(x for x in cities if x in city_dir)


def generate_ids(city_dirs, clahe):
print("Generate image ids for dirs: " + str(city_dirs))
ids = []
for city_dir in city_dirs:
city_id = get_city_id(city_dir)
subdir = "MUL-PanSharpen"
#if clahe:
# subdir = "CLAHE-MUL-PanSharpen"
mul_dir = os.path.join(city_dir, subdir)
for f in os.listdir(mul_dir):
if f.endswith(".tif"):
ids.append((city_id, f.split(".tif")[0].split("MUL-PanSharpen_")[1]))
return sorted(ids)


def get_groundtruth(city_dirs):
gt = {}
for city_dir in city_dirs:
summary_dir = os.path.join(city_dir, 'summaryData')

path_to_csv = os.path.join(summary_dir, city_dir.split("/")[-1] + ".csv")
print("Processing CSV: " + path_to_csv)
matrix = pd.read_csv(path_to_csv).as_matrix()
for line in matrix:
id = line[0]
linestring = line[1]
gt_lines = gt.get(id, [])
gt_lines.append(linestring)
gt[id] = gt_lines
return gt


class MULSpacenetDataset(Iterator):
def __init__(self,
data_dirs,
wdata_dir,
image_ids,
crop_shape,
preprocessing_function='tf',
random_transformer=None,
batch_size=8,
crops_per_image=3,
thickness=16,
shuffle=True,
image_name_template=None,
masks_dict=None,
stretch_and_mean=None,
ohe_city=True,
clahe=False,
seed=None):
self.data_dirs = data_dirs
self.image_ids = image_ids
self.wdata_dir = wdata_dir
self.clahe = clahe
self.image_name_template = image_name_template
self.masks_dict = masks_dict
self.random_transformer = random_transformer
self.crop_shape = crop_shape
self.stretch_and_mean = stretch_and_mean
self.ohe_city = ohe_city
self.crops_per_image = crops_per_image
self.preprocessing_function = preprocessing_function
self.thickness = thickness
if seed is None:
seed = np.uint32(time.time() * 1000)

super(MULSpacenetDataset, self).__init__(len(self.image_ids), batch_size, shuffle, seed)

def transform_mask(self, mask, image):
mask[np.where(np.all(image[..., :3] == (0, 0, 0), axis=-1))] = 0
return mask

def transform_batch_y(self, batch_y):
return batch_y

def _get_batches_of_transformed_samples(self, index_array):
batch_x = []
batch_y = []

for batch_index, image_index in enumerate(index_array):
city, id = self.image_ids[image_index]

for data_dir in self.data_dirs:
city_dir_name = data_dir.split("/")[-1]
if city in data_dir:
img_name = self.image_name_template.format(id=id)
if self.clahe:
data_dir = os.path.join(self.wdata_dir, city_dir_name)
path = os.path.join(data_dir, img_name)
else:
path = os.path.join(data_dir, img_name)
break

arr = tifffile.imread(path)

image = np.stack([arr[..., 4], arr[..., 2], arr[..., 1], arr[..., 0], arr[..., 3], arr[..., 5], arr[..., 6], arr[..., 7]], axis=-1)
if self.stretch_and_mean:
image = stretch_8bit(image) * 255
if self.ohe_city:
ohe_city = np.zeros((image.shape[0], image.shape[1], 4), dtype="float32")
ohe_city[..., cities.index(city)] = 2047
image = np.concatenate([image, ohe_city], axis=-1)
image = np.array(image, dtype="float32")

lines = self.masks_dict[id]
mask = np.zeros((image.shape[0], image.shape[1], 1))
# lines in wkt format, pygeoif
if os.path.exists("masks/" + id + ".png"):
mask = img_to_array(load_img("masks/" + id + ".png", grayscale=True)) / 255.
else:
mask = np.zeros((image.shape[0], image.shape[1], 1))
# lines in wkt format, pygeoif
for line in lines:
if "LINESTRING EMPTY" == line:
continue
points = pygeoif.from_wkt(line).coords
for i in range(1, len(points)):
pt1 = (int(points[i - 1][0]), int(points[i - 1][1]))
pt2 = (int(points[i][0]), int(points[i][1]))
cv2.line(mask, pt1, pt2, (1,), thickness=self.thickness)
cv2.imwrite("masks/" + id + ".png", mask * 255)
ori_height = image.shape[0]
ori_width = image.shape[1]

mask = self.transform_mask(mask, image)
if self.random_transformer is not None:
image, mask = self.random_transformer.random_transform(image, mask)

if self.stretch_and_mean:
mean_band = mean_bands[city]

for band in range(len(mean_band)):
image[..., band] -= mean_band[band]
if self.crop_shape is not None:
crops = 0
tries = 0
while crops < self.crops_per_image:
tries += 1
if self.random_transformer is None:
y_start = (ori_height - self.crop_shape[0]) // 2
x_start = (ori_width - self.crop_shape[1]) // 2
else:
y_start = random.randint(0, ori_height - self.crop_shape[0] - 1)
x_start = random.randint(0, ori_width - self.crop_shape[1] - 1)
y_end = y_start + self.crop_shape[0]
x_end = x_start + self.crop_shape[1]
crop_image = image[y_start:y_end, x_start:x_end, :]
crop_mask = mask[y_start:y_end, x_start:x_end, :]
if self.random_transformer is None:
batch_x.append(crop_image)
batch_y.append(crop_mask)
crops += 1
elif np.count_nonzero(crop_image) > 100 or tries > 20:
batch_x.append(crop_image)
batch_y.append(crop_mask)
crops += 1
else:
batch_x.append(image)
batch_y.append(mask)
batch_x = np.array(batch_x, dtype="float32")
batch_y = np.array(batch_y, dtype="float32")
if self.preprocessing_function == 'caffe':
batch_x_rgb = batch_x[..., :3]
batch_x_bgr = batch_x_rgb[..., ::-1]
batch_x[..., :3] = batch_x_bgr
if not self.stretch_and_mean:
batch_x = batch_x / 8. - 127.5
else:
if self.stretch_and_mean:
batch_x = batch_x / 255
else:
batch_x = batch_x / 1024. - 1
return self.transform_batch_x(batch_x), self.transform_batch_y(batch_y)

def transform_batch_x(self, batch_x):
return batch_x

def next(self):
with self.lock:
index_array = next(self.index_generator)
return self._get_batches_of_transformed_samples(index_array)
1 change: 1 addition & 0 deletions selem_sef-solution/docker-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nvidia-docker build -t selim_sef .
3 changes: 3 additions & 0 deletions selem_sef-solution/docker-remove.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash

docker images -q --filter "dangling=true" | xargs docker rmi
3 changes: 3 additions & 0 deletions selem_sef-solution/docker-run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash

nvidia-docker run -v /local_data/SpaceNet_Roads_Dataset:/data -v /local_data/SpaceNet_Roads_Dataset/results/selim_sef:/wdata --rm -ti --ipc=host selim_sef
4 changes: 4 additions & 0 deletions selem_sef-solution/docker-stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash

docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)
3 changes: 3 additions & 0 deletions selem_sef-solution/download_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mkdir trained_models
aws s3 sync s3://spacenet-dataset/SpaceNet_Roads_Competition/Pretrained_Models/04-selim_sef/ trained_models/

57 changes: 57 additions & 0 deletions selem_sef-solution/generate_submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from multiprocessing.pool import Pool

import cv2
import numpy as np
import os

from params import args
from tools.vectorize import to_line_strings

folders = [
'all_masks/linknet_inception',
'all_masks/inception-unet',
'all_masks/clahe_inception-swish',
'all_masks/clahe_linknet_inception',
'all_masks/clahe_linknet_inception_lite',
'all_masks/clahe_linknet_resnet50'
]


def predict(f):
image_id = f.split('MUL-PanSharpen_')[1].split(".tif")[0]
masks = []
for folder in folders:
masks.append(cv2.imread(os.path.join(folder, f + ".png")) / 255)
mask = np.average(np.array(masks), axis=0)
line_strings = to_line_strings(mask, threashold=0.25, sigma=0.5, dilation=1)
result = ""
if len(line_strings) > 0:
for line_string in line_strings:
result += '{image_id},"{line}"\n'.format(image_id=image_id, line=line_string)
else:
result += "{image_id},{line}\n".format(image_id=image_id, line="LINESTRING EMPTY")

return result


def multi_predict(X, predict):
pool = Pool(4)
results = pool.map(predict, X)
pool.close()
pool.join()
return results


f_submit = open(args.output_file + ".txt", "w")

for city_dir in args.dirs_to_process:
print("ensemble for dir ", city_dir)
pool = Pool(4)

test_dir = os.path.join(city_dir, 'MUL-PanSharpen')
files = sorted(os.listdir(test_dir))
city_results = multi_predict(files, predict)
for line in city_results:
f_submit.write(line)

f_submit.close()
Loading

0 comments on commit 28fc286

Please sign in to comment.