Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Switch to using pickle instead of npz to store intermediate results #54

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions label_maker/images.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=unused-argument
"""Generate an .npz file containing arrays for training machine learning algorithms"""

import pickle
from os import makedirs, path as op
from random import shuffle

Expand Down Expand Up @@ -32,8 +33,10 @@ def download_images(dest_folder, classes, imagery, ml_type, background_ratio, **
Other properties from CLI config passed as keywords to other utility functions
"""
# open labels file
labels_file = op.join(dest_folder, 'labels.npz')
tiles = np.load(labels_file)
#labels_file = op.join(dest_folder, 'labels.npz')
#tiles = np.load(labels_file)
with open(op.join(dest_folder, 'labels.pkl'), 'rb') as f:
tiles = pickle.load(f)

# create tiles directory
tiles_dir = op.join(dest_folder, 'tiles')
Expand All @@ -50,14 +53,14 @@ def class_test(value):
elif ml_type == 'classification':
return value[0] == 0
return None
class_tiles = [tile for tile in tiles.files if class_test(tiles[tile])]

class_tiles = [key for key, tile in tiles.items() if class_test(tile)]
# for classification problems with a single class, we also get background
# tiles up to len(class_tiles) * config.get('background_ratio')
background_tiles = []
limit = len(class_tiles) * background_ratio
if ml_type == 'classification' and len(classes) == 1:
background_tiles_full = [tile for tile in tiles.files if tile not in class_tiles]
background_tiles_full = [tile for tile in tiles if tile not in class_tiles]
shuffle(background_tiles_full)
background_tiles = background_tiles_full[:limit]

Expand Down
7 changes: 5 additions & 2 deletions label_maker/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from subprocess import run, Popen, PIPE
import json
from functools import partial
import pickle

import numpy as np
import mapbox_vector_tile
Expand Down Expand Up @@ -106,9 +107,11 @@ def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box, spar
print('Using sparse mode; subselected {} background tiles'.format(n_neg_ex))

# write out labels as numpy arrays
labels_file = op.join(dest_folder, 'labels.npz')
labels_file = op.join(dest_folder, 'labels.pkl')
print('Writing out labels to {}'.format(labels_file))
np.savez(labels_file, **tile_results)
#np.savez(labels_file, **tile_results)
with open(labels_file, 'wb') as f:
pickle.dump(tile_results, f)

# write out labels as GeoJSON or PNG
if ml_type == 'classification':
Expand Down
11 changes: 7 additions & 4 deletions label_maker/package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=unused-argument
"""Generate an .npz file containing arrays for training machine learning algorithms"""

import pickle
from os import path as op
from urllib.parse import urlparse
import numpy as np
Expand Down Expand Up @@ -35,17 +36,19 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False, train_
np.random.seed(seed)

# open labels file, create tile array
labels_file = op.join(dest_folder, 'labels.npz')
labels = np.load(labels_file)
tile_names = [tile for tile in labels.files]
labels_file = op.join(dest_folder, 'labels.pkl')
#labels = np.load(labels_file)
with open(labels_file, 'rb') as f:
labels = pickle.load(f)
tile_names = [tile for tile in labels]
tile_names.sort()
tiles = np.array(tile_names)
np.random.shuffle(tiles)

# find maximum number of features in advance so numpy shapes match
if ml_type == 'object-detection':
max_features = 0
for tile in labels.files:
for tile in labels:
features = len(labels[tile])
if features > max_features:
max_features = features
Expand Down
10 changes: 6 additions & 4 deletions label_maker/preview.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=unused-argument
"""Produce imagery examples for specified classes"""

import pickle
from os import path as op
from os import makedirs
from urllib.parse import urlparse
Expand Down Expand Up @@ -35,8 +36,10 @@ def preview(dest_folder, number, classes, imagery, ml_type, **kwargs):
Other properties from CLI config passed as keywords to other utility functions
"""
# open labels file
labels_file = op.join(dest_folder, 'labels.npz')
tiles = np.load(labels_file)
labels_file = op.join(dest_folder, 'labels.pkl')
#tiles = np.load(labels_file)
with open(labels_file, 'rb') as f:
tiles = pickle.load(f)

# create example tiles directory
examples_dir = op.join(dest_folder, 'examples')
Expand All @@ -53,8 +56,7 @@ def preview(dest_folder, number, classes, imagery, ml_type, **kwargs):
if not op.isdir(class_dir):
makedirs(class_dir)

class_tiles = (t for t in tiles.files
if class_match(ml_type, tiles[t], i + 1))
class_tiles = (t for t in tiles if class_match(ml_type, tiles[t], i + 1))
print('Downloading at most {} tiles for class {}'.format(number, cl.get('name')))
for n, tile in enumerate(class_tiles):
if n > number:
Expand Down