diff --git a/label_maker/images.py b/label_maker/images.py index 44b94cb..48dff49 100644 --- a/label_maker/images.py +++ b/label_maker/images.py @@ -1,6 +1,7 @@ # pylint: disable=unused-argument """Generate an .npz file containing arrays for training machine learning algorithms""" +import pickle from os import makedirs, path as op from random import shuffle @@ -32,8 +33,10 @@ def download_images(dest_folder, classes, imagery, ml_type, background_ratio, ** Other properties from CLI config passed as keywords to other utility functions """ # open labels file - labels_file = op.join(dest_folder, 'labels.npz') - tiles = np.load(labels_file) + #labels_file = op.join(dest_folder, 'labels.npz') + #tiles = np.load(labels_file) + with open(op.join(dest_folder, 'labels.pkl'), 'rb') as f: + tiles = pickle.load(f) # create tiles directory tiles_dir = op.join(dest_folder, 'tiles') @@ -50,14 +53,14 @@ def class_test(value): elif ml_type == 'classification': return value[0] == 0 return None - class_tiles = [tile for tile in tiles.files if class_test(tiles[tile])] + class_tiles = [key for key, tile in tiles.items() if class_test(tile)] # for classification problems with a single class, we also get background # tiles up to len(class_tiles) * config.get('background_ratio') background_tiles = [] limit = len(class_tiles) * background_ratio if ml_type == 'classification' and len(classes) == 1: - background_tiles_full = [tile for tile in tiles.files if tile not in class_tiles] + background_tiles_full = [tile for tile in tiles if tile not in class_tiles] shuffle(background_tiles_full) background_tiles = background_tiles_full[:limit] diff --git a/label_maker/label.py b/label_maker/label.py index d0edd4e..f9d5d89 100644 --- a/label_maker/label.py +++ b/label_maker/label.py @@ -5,6 +5,7 @@ from subprocess import run, Popen, PIPE import json from functools import partial +import pickle import numpy as np import mapbox_vector_tile @@ -106,9 +107,11 @@ def make_labels(dest_folder, zoom, country, classes, ml_type, bounding_box, spar print('Using sparse mode; subselected {} background tiles'.format(n_neg_ex)) # write out labels as numpy arrays - labels_file = op.join(dest_folder, 'labels.npz') + labels_file = op.join(dest_folder, 'labels.pkl') print('Writing out labels to {}'.format(labels_file)) - np.savez(labels_file, **tile_results) + #np.savez(labels_file, **tile_results) + with open(labels_file, 'wb') as f: + pickle.dump(tile_results, f) # write out labels as GeoJSON or PNG if ml_type == 'classification': diff --git a/label_maker/package.py b/label_maker/package.py index 5be29d2..06c6f48 100644 --- a/label_maker/package.py +++ b/label_maker/package.py @@ -1,6 +1,7 @@ # pylint: disable=unused-argument """Generate an .npz file containing arrays for training machine learning algorithms""" +import pickle from os import path as op from urllib.parse import urlparse import numpy as np @@ -35,9 +36,11 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False, train_ np.random.seed(seed) # open labels file, create tile array - labels_file = op.join(dest_folder, 'labels.npz') - labels = np.load(labels_file) - tile_names = [tile for tile in labels.files] + labels_file = op.join(dest_folder, 'labels.pkl') + #labels = np.load(labels_file) + with open(labels_file, 'rb') as f: + labels = pickle.load(f) + tile_names = [tile for tile in labels] tile_names.sort() tiles = np.array(tile_names) np.random.shuffle(tiles) @@ -45,7 +48,7 @@ def package_directory(dest_folder, classes, imagery, ml_type, seed=False, train_ # find maximum number of features in advance so numpy shapes match if ml_type == 'object-detection': max_features = 0 - for tile in labels.files: + for tile in labels: features = len(labels[tile]) if features > max_features: max_features = features diff --git a/label_maker/preview.py b/label_maker/preview.py index c73a5b0..9a3b9b7 100644 --- a/label_maker/preview.py +++ b/label_maker/preview.py @@ -1,6 +1,7 @@ # pylint: disable=unused-argument """Produce imagery examples for specified classes""" +import pickle from os import path as op from os import makedirs from urllib.parse import urlparse @@ -35,8 +36,10 @@ def preview(dest_folder, number, classes, imagery, ml_type, **kwargs): Other properties from CLI config passed as keywords to other utility functions """ # open labels file - labels_file = op.join(dest_folder, 'labels.npz') - tiles = np.load(labels_file) + labels_file = op.join(dest_folder, 'labels.pkl') + #tiles = np.load(labels_file) + with open(labels_file, 'rb') as f: + tiles = pickle.load(f) # create example tiles directory examples_dir = op.join(dest_folder, 'examples') @@ -53,8 +56,7 @@ def preview(dest_folder, number, classes, imagery, ml_type, **kwargs): if not op.isdir(class_dir): makedirs(class_dir) - class_tiles = (t for t in tiles.files - if class_match(ml_type, tiles[t], i + 1)) + class_tiles = (t for t in tiles if class_match(ml_type, tiles[t], i + 1)) print('Downloading at most {} tiles for class {}'.format(number, cl.get('name'))) for n, tile in enumerate(class_tiles): if n > number: