-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
295 lines (223 loc) · 11.5 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
import os # For interacting with the operating system
import pandas as pd # For data manipulation and analysis
import numpy as np # For numerical computing
import random # For generating random numbers
import cv2 # For computer vision tasks
import albumentations as albu # For image augmentation
import matplotlib.pyplot as plt # For plotting
from sklearn.model_selection import train_test_split # For splitting data into train and test sets
import torch # PyTorch library
import torch.nn as nn # PyTorch's neural network module
import segmentation_models_pytorch as smp # PyTorch segmentation models
SEED = 42 # Setting the random seed for reproducibility
BASE_URL = os.path.join("../input", "airbus-ship-detection") # Defining base URL for dataset
TRAIN_URL = os.path.join(BASE_URL, "train_v2") # URL for training data
TEST_URL = os.path.join(BASE_URL, "test_v2") # URL for test data
def seed_everything(seed=42):
# Setting random seed for various libraries
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
def rle_decode_the_mask(mask_series: pd.Series, shape: tuple=None):
"""
Decode an RLE-encoded mask from a pandas series.
Parameters:
- mask_series (pd.Series): Pandas series containing the mask (0/"0" if empty).
- shape (tuple, optional): Shape of the image in case of resizing.
Returns:
- original_shape_mask (torch.Tensor): Decoded mask with masked pixels being white.
"""
# Extracting the mask array from the series
mask_array = mask_series.values
# Checking if the mask is empty (no ships)
if mask_array == 0 or mask_array == '0':
return torch.zeros(shape) # Returning a zero tensor
else:
mask_array = mask_array[0] # unpacking the mask
# Creating a background mask based on the shape
if shape is None:
bg_mask = torch.zeros(768 * 768, dtype=torch.uint8)
else:
bg_mask = torch.zeros(shape[0] * shape[1], dtype=torch.uint8)
# Splitting the mask array to decode RLE
mask = mask_array.split()
starts, lengths = [np.asarray(x, dtype=int) for x in (mask[0:][::2], mask[1:][::2])] # take each first value for pixel posiiton and each second - for the pixels qty after it
starts -= 1 # Adjusting the start values to 0-indexing
ends = starts + lengths # Calculating the end values based on start and length
# Create a mask with the shape of the original image
original_shape_mask = torch.zeros(768 * 768, dtype=torch.uint8)
# Assign 1 to the pixels based on RLE encoding
for lo, hi in zip(starts, ends):
original_shape_mask[lo:hi] = 1 # mark the "masked" pixels as white
# If the shape is provided and different from the original image's shape, resize the mask
if shape is not None:
# Reshaping the mask to the original image dimensions (just in case)
original_shape_mask = original_shape_mask.view(768, 768)
# Resizing the mask to match the provided shape
original_shape_mask = cv2.resize(original_shape_mask.numpy(), (shape[1], shape[0]), interpolation=cv2.INTER_LINEAR)
# Converting the resized mask back to a PyTorch tensor
original_shape_mask = torch.from_numpy(original_shape_mask)
return original_shape_mask.T # Returning the decoded mask
def encode_the_image(imagePath: str, shape: tuple=None):
"""
Encode an image into a NumPy array with optional resizing.
Parameters:
- imagePath (str): The path to the image file.
- shape (tuple, optional): The desired shape of the output image (height, width).
Returns:
- img (numpy.ndarray): Encoded image as a NumPy array; resized; color scheme: RGB.
"""
# Read the image from the given path
img = cv2.imread(imagePath)
# Convert the color space from BGR to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Resize the image if a shape is provided
if shape is not None:
img = cv2.resize(img, shape, interpolation=cv2.INTER_LINEAR)
return img # Return the encoded image
def plot_the_image(imagePath: str, axis, shape: tuple=None):
"""
Plot an image on a given axis with optional resizing.
Parameters:
- imagePath (str): The path to the image file.
- axis: The matplotlib axis to plot the image.
- shape (tuple, optional): The desired shape of the output image (height, width).
"""
# Check if the image exists
if not os.path.exists(imagePath):
print("The image", imagePath, "is not found")
return
# Encode the image
img = encode_the_image(imagePath, shape)
# Resize the image if a shape is provided
if shape is not None:
img = cv2.resize(img, shape, interpolation=cv2.INTER_LINEAR)
# Plot the image on the given axis
if axis is not None:
axis.imshow(img)
axis.axis('off')
def plot_the_mask(mask_array: pd.Series, axis=None, shape: tuple=None):
"""
Plot a mask on a given axis with optional resizing.
Parameters:
- mask_array (pd.Series): Pandas series containing the mask.
- axis: The matplotlib axis to plot the mask.
- shape (tuple, optional): The desired shape of the output mask (height, width).
Returns:
- None
"""
# Decode the mask
mask_decoded = rle_decode_the_mask(mask_array, shape)
# Convert mask_decoded to numpy array
mask_decoded_np = mask_decoded.numpy()
# Reshape the mask if shape is provided
if shape is not None:
mask_decoded_resized = cv2.resize(mask_decoded_np, shape[::-1], interpolation=cv2.INTER_LINEAR)
# Plot the mask on the given axis
if axis is not None:
axis.imshow(mask_decoded_resized, cmap='gray')
axis.axis('off')
def apply_threshold(mask: np.ndarray, threshold: float):
"""
Applies a threshold to a mask.
Parameters:
- mask (numpy.ndarray): Input predicted mask array.
- threshold (float): Threshold value.
Returns:
- thresholded_mask (numpy.ndarray): Thresholded mask array.
"""
return (mask > threshold).astype(np.uint8)
def get_prediction(model: nn.Module, image_name: str, shape: tuple=(256,256)):
"""
Get predictions for a given image using the specified model.
Parameters:
- model (nn.Module): Trained model.
- image_name (str): Name of the image file.
- shape (tuple): height and width of the final mask
Returns:
- predicted_mask (numpy.ndarray): Predicted mask array if the image is within the validation data, None otherwise.
"""
# Check if the image is in the validation data
if len(validation_df[validation_df['ImageId'] == image_name]) == 0:
print("The", image_name, "doesn't exist or may be in a training data. We need to assess the quality utilizing the unseen data only")
return None
# Read the image using OpenCV and convert it to RGB color space
image = cv2.imread(os.path.join(TRAIN_URL, image_name))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize the image to the desired input size using bilinear interpolation
image = cv2.resize(image, shape, interpolation=cv2.INTER_LINEAR)
# Apply validation augmentations to the image
image = VALID_AUGS(image=image)['image']
# Convert the image to a PyTorch tensor, permute dimensions, add batch dimension, and move to the device
image_tensor = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float().to(device) # from (width, height, color_channel) to (batch_size=1, color_channel, width, height)
# Perform prediction
with torch.no_grad():
model.eval() # Set the model to evaluation mode
predicted_mask = model(image_tensor).sigmoid().squeeze().cpu().numpy() # convert to probabilities, reshape into (width, height), move to cpu and cast to numpy
return predicted_mask
if __name__ == "__main__":
# ensure reproducability across the script (images are still sampled randomly)
seed_everything()
# import data after the EDA
df = pd.read_csv("train_ship_segmentation_post_EDA.csv")
# Splitting the dataframe into training and validation sets, stratifying on "Ships" column
train_df, validation_df = train_test_split(df, test_size=0.2, random_state=SEED, stratify=df['Ships'])
# Resetting the index of the training and validation dataframes (needed for a convenient sampling; specifically after the stratification)
train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)
# select the device to process
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Going to use '{device}' device!")
# define the model architecture
unet_model = smp.Unet("resnet18", classes=1, encoder_weights="imagenet")
# move the model to the device
unet_model = unet_model.to(device)
print("Number of trainable parameters -", sum(p.numel() for p in unet_model.parameters() if p.requires_grad))
# ensure on warnings or errors if working on CPU, then load the best model weights
if device == "cpu":
checkpoint = torch.load(os.path.join("models", "model_2_sratified_unet_semantic_segmentation.pt"), map_location=torch.device('cpu'))
else:
checkpoint = torch.load(os.path.join("models", "model_2_sratified_unet_semantic_segmentation.pt"))
# load imported weights into the model
unet_model.load_state_dict(checkpoint)
# image size to plot and predict (model was trained on (256,256))
INPUT_SIZE = (256,256)
# the threshold that divides predictions into "non-ship pixel" and "ship-pixel"
THRESHOLD = 4e-4
# validation augmentations (same that the model was trained on)
VALID_AUGS = albu.Compose([
albu.Resize(*INPUT_SIZE),
albu.Normalize(),
])
# Filter images by ship count (no ships, 1 ship, multiple ships)
no_ships_images = validation_df[validation_df['Ships'] == 0].sample(3)['ImageId'].tolist()
single_ship_images = validation_df[validation_df['Ships'] == 1].sample(3)['ImageId'].tolist()
multiple_ships_images = validation_df[validation_df['Ships'] > 1].sample(3)['ImageId'].tolist()
# Determine the maximum number of rows needed for the subplot grid
ROWS_PER_IMAGE = max(len(no_ships_images), len(single_ship_images), len(multiple_ships_images)) * 3
# Create a figure and subplots for displaying images and masks
fig = plt.figure(figsize=(20, 35))
axis = fig.subplots(ROWS_PER_IMAGE, 3)
# Function to plot images and masks
def plot_images(image_list, ships, title_suffix):
for i, image_id in enumerate(image_list):
plot_the_image(os.path.join(TRAIN_URL, image_id), axis[i*3 + ships, 0], INPUT_SIZE)
axis[i*3 + ships, 0].set_title(f'Input Image ({title_suffix})')
axis[i*3 + ships, 0].axis('off')
plot_the_mask(validation_df[validation_df['ImageId'] == image_id]['EncodedPixels'], axis[i*3 + ships, 1], INPUT_SIZE)
axis[i*3 + ships, 1].set_title('Ground Truth Mask')
axis[i*3 + ships, 1].axis('off')
predicted_mask = get_prediction(unet_model, image_id, INPUT_SIZE)
predicted_mask = apply_threshold(predicted_mask, THRESHOLD)
axis[i*3 + ships, 2].imshow(predicted_mask, cmap='gray')
axis[i*3 + ships, 2].set_title('Predicted Mask')
axis[i*3 + ships, 2].axis('off')
# Plot images for each category
plot_images(no_ships_images, 0, '0 ships')
plot_images(single_ship_images, 1, '1 ship')
plot_images(multiple_ships_images, 2, '> 1 ship')
plt.tight_layout()
plt.show()