diff --git a/official/.gitignore b/official/.gitignore new file mode 100644 index 00000000000..8ffec1408f3 --- /dev/null +++ b/official/.gitignore @@ -0,0 +1,3 @@ +cnn/data +MNIST-data +labels.txt diff --git a/official/README.md b/official/README.md new file mode 100644 index 00000000000..23c947eb661 --- /dev/null +++ b/official/README.md @@ -0,0 +1,13 @@ +# TensorFlow Official Models + +The TensorFlow official models are a collection of example models that use TensorFlow's high-level APIs. They are intended to be well-maintained, tested, and kept up to date with the latest stable TensorFlow API. They should also be reasonably optimized for fast performance while still being easy to read. + +Below is the list of models contained in the garden: + +[mnist](mnist): A basic model to classify digits from the MNIST dataset. + +[resnet](resnet): A deep residual network that can be used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes. + +More models to come! + +If you would like to make any fixes or improvements to the models, please [submit a pull request](https://github.com/tensorflow/models/compare). diff --git a/official/mnist/README.md b/official/mnist/README.md new file mode 100644 index 00000000000..bb3a0684782 --- /dev/null +++ b/official/mnist/README.md @@ -0,0 +1,29 @@ +# MNIST in TensorFlow + +This directory builds a convolutional neural net to classify the [MNIST +dataset](http://yann.lecun.com/exdb/mnist/) using the +[tf.contrib.data](https://www.tensorflow.org/api_docs/python/tf/contrib/data), +[tf.estimator.Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator), +and +[tf.layers](https://www.tensorflow.org/api_docs/python/tf/layers) +APIs. + + +## Setup + +To begin, you'll simply need the latest version of TensorFlow installed. + +First convert the MNIST data to TFRecord file format by running the following: + +``` +python convert_to_records.py +``` + +Then to train the model, run the following: + +``` +python mnist.py +``` + +The model will begin training and will automatically evaluate itself on the +validation data. diff --git a/official/mnist/__init__.py b/official/mnist/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/official/mnist/convert_to_records.py b/official/mnist/convert_to_records.py new file mode 100644 index 00000000000..cf77063f817 --- /dev/null +++ b/official/mnist/convert_to_records.py @@ -0,0 +1,92 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts MNIST data to TFRecords file format with Example protos.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys + +import tensorflow as tf + +from tensorflow.contrib.learn.python.learn.datasets import mnist + +parser = argparse.ArgumentParser() + +parser.add_argument('--directory', type=str, default='/tmp/mnist_data', + help='Directory to download data files and write the ' + 'converted result.') + +parser.add_argument('--validation_size', type=int, default=0, + help='Number of examples to separate from the training ' + 'data for the validation set.') + + +def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def convert_to(data_set, name): + """Converts a dataset to TFRecords.""" + images = data_set.images + labels = data_set.labels + num_examples = data_set.num_examples + + if images.shape[0] != num_examples: + raise ValueError('Images size %d does not match label size %d.' % + (images.shape[0], num_examples)) + rows = images.shape[1] + cols = images.shape[2] + depth = images.shape[3] + + filename = os.path.join(FLAGS.directory, name + '.tfrecords') + print('Writing', filename) + writer = tf.python_io.TFRecordWriter(filename) + for index in range(num_examples): + image_raw = images[index].tostring() + example = tf.train.Example(features=tf.train.Features(feature={ + 'height': _int64_feature(rows), + 'width': _int64_feature(cols), + 'depth': _int64_feature(depth), + 'label': _int64_feature(int(labels[index])), + 'image_raw': _bytes_feature(image_raw)})) + writer.write(example.SerializeToString()) + writer.close() + + +def main(unused_argv): + # Get the data. + data_sets = mnist.read_data_sets(FLAGS.directory, + dtype=tf.uint8, + reshape=False, + validation_size=FLAGS.validation_size) + + # Convert to Examples and write the result to TFRecords. + convert_to(data_sets.train, 'train') + convert_to(data_sets.validation, 'validation') + convert_to(data_sets.test, 'test') + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + FLAGS = parser.parse_args() + tf.app.run() diff --git a/official/mnist/mnist.py b/official/mnist/mnist.py new file mode 100644 index 00000000000..b3b29a38935 --- /dev/null +++ b/official/mnist/mnist.py @@ -0,0 +1,226 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Convolutional Neural Network Estimator for MNIST, built with tf.layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +import numpy as np +import tensorflow as tf + +parser = argparse.ArgumentParser() + +# Basic model parameters. +parser.add_argument('--batch_size', type=int, default=100, + help='Number of images to process in a batch') + +parser.add_argument('--data_dir', type=str, default='/tmp/mnist_data', + help='Path to the MNIST data directory.') + +parser.add_argument('--model_dir', type=str, default='/tmp/mnist_model', + help='The directory where the model will be stored.') + +parser.add_argument('--steps', type=int, default=20000, + help='Number of steps to train.') + + +def input_fn(mode, batch_size=1): + """A simple input_fn using the contrib.data input pipeline.""" + + def parser(serialized_example): + """Parses a single tf.Example into image and label tensors.""" + features = tf.parse_single_example( + serialized_example, + features={ + 'image_raw': tf.FixedLenFeature([], tf.string), + 'label': tf.FixedLenFeature([], tf.int64), + }) + image = tf.decode_raw(features['image_raw'], tf.uint8) + image.set_shape([28 * 28]) + + # Normalize the values of the image from the range [0, 255] to [-0.5, 0.5] + image = tf.cast(image, tf.float32) / 255 - 0.5 + label = tf.cast(features['label'], tf.int32) + return image, tf.one_hot(label, 10) + + if mode == tf.estimator.ModeKeys.TRAIN: + tfrecords_file = os.path.join(FLAGS.data_dir, 'train.tfrecords') + else: + assert mode == tf.estimator.ModeKeys.EVAL, 'invalid mode' + tfrecords_file = os.path.join(FLAGS.data_dir, 'test.tfrecords') + + assert os.path.exists(tfrecords_file), ('Run convert_to_records.py first to ' + 'convert the MNIST data to TFRecord file format.') + + dataset = tf.contrib.data.TFRecordDataset([tfrecords_file]) + + # For training, repeat the dataset forever + if mode == tf.estimator.ModeKeys.TRAIN: + dataset = dataset.repeat() + + # Map the parser over dataset, and batch results by up to batch_size + dataset = dataset.map(parser, num_threads=1, output_buffer_size=batch_size) + dataset = dataset.batch(batch_size) + images, labels = dataset.make_one_shot_iterator().get_next() + + return images, labels + + +def mnist_model(inputs, mode): + """Takes the MNIST inputs and mode and outputs a tensor of logits.""" + # Input Layer + # Reshape X to 4-D tensor: [batch_size, width, height, channels] + # MNIST images are 28x28 pixels, and have one color channel + inputs = tf.reshape(inputs, [-1, 28, 28, 1]) + data_format = 'channels_last' + + if tf.test.is_built_with_cuda(): + # When running on GPU, transpose the data from channels_last (NHWC) to + # channels_first (NCHW) to improve performance. + data_format = 'channels_first' + inputs = tf.transpose(inputs, [0, 3, 1, 2]) + + # Convolutional Layer #1 + # Computes 32 features using a 5x5 filter with ReLU activation. + # Padding is added to preserve width and height. + # Input Tensor Shape: [batch_size, 28, 28, 1] + # Output Tensor Shape: [batch_size, 28, 28, 32] + conv1 = tf.layers.conv2d( + inputs=inputs, + filters=32, + kernel_size=[5, 5], + padding='same', + activation=tf.nn.relu, + data_format=data_format) + + # Pooling Layer #1 + # First max pooling layer with a 2x2 filter and stride of 2 + # Input Tensor Shape: [batch_size, 28, 28, 32] + # Output Tensor Shape: [batch_size, 14, 14, 32] + pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2, + data_format=data_format) + + # Convolutional Layer #2 + # Computes 64 features using a 5x5 filter. + # Padding is added to preserve width and height. + # Input Tensor Shape: [batch_size, 14, 14, 32] + # Output Tensor Shape: [batch_size, 14, 14, 64] + conv2 = tf.layers.conv2d( + inputs=pool1, + filters=64, + kernel_size=[5, 5], + padding='same', + activation=tf.nn.relu, + data_format=data_format) + + # Pooling Layer #2 + # Second max pooling layer with a 2x2 filter and stride of 2 + # Input Tensor Shape: [batch_size, 14, 14, 64] + # Output Tensor Shape: [batch_size, 7, 7, 64] + pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, + data_format=data_format) + + # Flatten tensor into a batch of vectors + # Input Tensor Shape: [batch_size, 7, 7, 64] + # Output Tensor Shape: [batch_size, 7 * 7 * 64] + pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) + + # Dense Layer + # Densely connected layer with 1024 neurons + # Input Tensor Shape: [batch_size, 7 * 7 * 64] + # Output Tensor Shape: [batch_size, 1024] + dense = tf.layers.dense(inputs=pool2_flat, units=1024, + activation=tf.nn.relu) + + # Add dropout operation; 0.6 probability that element will be kept + dropout = tf.layers.dropout( + inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN)) + + # Logits layer + # Input Tensor Shape: [batch_size, 1024] + # Output Tensor Shape: [batch_size, 10] + logits = tf.layers.dense(inputs=dropout, units=10) + return logits + + +def mnist_model_fn(features, labels, mode): + """Model function for MNIST.""" + logits = mnist_model(features, mode) + + predictions = { + 'classes': tf.argmax(input=logits, axis=1), + 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') + } + + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + + loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits) + + # Configure the training op + if mode == tf.estimator.ModeKeys.TRAIN: + optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) + train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step()) + else: + train_op = None + + accuracy = tf.metrics.accuracy( + tf.argmax(labels, axis=1), predictions['classes']) + metrics = {'accuracy': accuracy} + + # Create a tensor named train_accuracy for logging purposes + tf.identity(accuracy[1], name='train_accuracy') + tf.summary.scalar('train_accuracy', accuracy[1]) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics) + + +def main(unused_argv): + # Create the Estimator + mnist_classifier = tf.estimator.Estimator( + model_fn=mnist_model_fn, model_dir=FLAGS.model_dir) + + # Train the model + tensors_to_log = { + 'train_accuracy': 'train_accuracy' + } + + logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=100) + + mnist_classifier.train( + input_fn=lambda: input_fn(tf.estimator.ModeKeys.TRAIN, FLAGS.batch_size), + steps=FLAGS.steps, + hooks=[logging_hook]) + + # Evaluate the model and print results + eval_results = mnist_classifier.evaluate( + input_fn=lambda: input_fn(tf.estimator.ModeKeys.EVAL)) + print() + print('Evaluation results:\n %s' % eval_results) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + FLAGS = parser.parse_args() + tf.app.run() diff --git a/official/mnist/mnist_test.py b/official/mnist/mnist_test.py new file mode 100644 index 00000000000..36e53bcdd34 --- /dev/null +++ b/official/mnist/mnist_test.py @@ -0,0 +1,68 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +import mnist + +tf.logging.set_verbosity(tf.logging.ERROR) + + +class BaseTest(tf.test.TestCase): + def input_fn(self): + features = tf.random_uniform([55000, 784]) + labels = tf.random_uniform([55000], maxval=9, dtype=tf.int32) + return features, tf.one_hot(labels, 10) + + def mnist_model_fn_helper(self, mode): + features, labels = self.input_fn() + image_count = features.shape[0] + spec = mnist.mnist_model_fn(features, labels, mode) + + predictions = spec.predictions + self.assertAllEqual(predictions['probabilities'].shape, (image_count, 10)) + self.assertEqual(predictions['probabilities'].dtype, tf.float32) + self.assertAllEqual(predictions['classes'].shape, (image_count,)) + self.assertEqual(predictions['classes'].dtype, tf.int64) + + if mode != tf.estimator.ModeKeys.PREDICT: + loss = spec.loss + self.assertAllEqual(loss.shape, ()) + self.assertEqual(loss.dtype, tf.float32) + + if mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = spec.eval_metric_ops + self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ()) + self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ()) + self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32) + self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32) + + def test_mnist_model_fn_train_mode(self): + self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN) + + def test_mnist_model_fn_eval_mode(self): + self.mnist_model_fn_helper(tf.estimator.ModeKeys.EVAL) + + def test_mnist_model_fn_predict_mode(self): + self.mnist_model_fn_helper(tf.estimator.ModeKeys.PREDICT) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/resnet/README.md b/official/resnet/README.md new file mode 100644 index 00000000000..59b6907c193 --- /dev/null +++ b/official/resnet/README.md @@ -0,0 +1,48 @@ +# ResNet in TensorFlow + +Deep residual networks, or ResNets for short, provided the breakthrough idea of identity mappings in order to enable training of very deep convolutional neural networks. This folder contains an implementation of ResNet for the ImageNet dataset written in TensorFlow. + +See the following papers for more background: + +[Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. + +[Identity Mappings in Deep Residual Networks](https://arxiv.org/pdf/1603.05027.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016. + +Please proceed according to which dataset you would like to train/evaluate on: + + +## CIFAR-10 + +### Setup + +You simply need to have the latest version of TensorFlow installed. + +First download and extract the CIFAR-10 data from Alex's website, specifying the location with the `--data_dir` flag. Run the following: + +``` +python cifar10_download_and_extract.py +``` + +Then to train the model, run the following: + +``` +python cifar10_main.py +``` + +Use `--data_dir` to specify the location of the CIFAR-10 data used in the previous step. There are more flag options as described in `cifar10_main.py`. + + +## ImageNet + +### Setup +To begin, you will need to download the ImageNet dataset and convert it to TFRecord format. Follow along with the [Inception guide](https://github.com/tensorflow/models/tree/master/inception#getting-started) in order to prepare the dataset. + +Once your dataset is ready, you can begin training the model as follows: + +``` +python imagenet_main.py --data_dir=/path/to/imagenet +``` + +The model will begin training and will automatically evaluate itself on the validation data roughly once per epoch. + +Note that there are a number of other options you can specify, including `--model_dir` to choose where to store the model and `--resnet_size` to choose the model size (options include ResNet-18 through ResNet-200). See [`imagenet_main.py`](imagenet_main.py) for the full list of options. diff --git a/official/resnet/__init__.py b/official/resnet/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/official/resnet/cifar10_download_and_extract.py b/official/resnet/cifar10_download_and_extract.py new file mode 100644 index 00000000000..5a4d41bd2a2 --- /dev/null +++ b/official/resnet/cifar10_download_and_extract.py @@ -0,0 +1,62 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts MNIST data to TFRecords file format with Example protos.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys +import tarfile + +from six.moves import urllib +import tensorflow as tf + +DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' + +parser = argparse.ArgumentParser() + +parser.add_argument( + '--data_dir', type=str, default='/tmp/cifar10_data', + help='Directory to download data and extract the tarball') + + +def main(unused_argv): + """Download and extract the tarball from Alex's website.""" + if not os.path.exists(FLAGS.data_dir): + os.makedirs(FLAGS.data_dir) + + filename = DATA_URL.split('/')[-1] + filepath = os.path.join(FLAGS.data_dir, filename) + + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, + 100.0 * count * block_size / total_size)) + sys.stdout.flush() + + filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') + + tarfile.open(filepath, 'r:gz').extractall(FLAGS.data_dir) + + +if __name__ == '__main__': + FLAGS = parser.parse_args() + tf.app.run() diff --git a/official/resnet/cifar10_main.py b/official/resnet/cifar10_main.py new file mode 100644 index 00000000000..838f48eb5f8 --- /dev/null +++ b/official/resnet/cifar10_main.py @@ -0,0 +1,278 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys + +import numpy as np +import tensorflow as tf + +import resnet_model + +HEIGHT = 32 +WIDTH = 32 +DEPTH = 3 +NUM_CLASSES = 10 +NUM_DATA_BATCHES = 5 +NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 10000 * NUM_DATA_BATCHES +NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000 + +parser = argparse.ArgumentParser() + +# Basic model parameters. +parser.add_argument('--data_dir', type=str, default='/tmp/cifar10_data', + help='The path to the CIFAR-10 data directory.') + +parser.add_argument('--model_dir', type=str, default='/tmp/cifar10_model', + help='The directory where the model will be stored.') + +parser.add_argument('--resnet_size', type=int, default=32, + help='The size of the ResNet model to use.') + +parser.add_argument('--train_steps', type=int, default=100000, + help='The number of batches to train.') + +parser.add_argument('--steps_per_eval', type=int, default=4000, + help='The number of batches to run in between evaluations.') + +parser.add_argument('--batch_size', type=int, default=128, + help='The number of images per batch.') + +FLAGS = parser.parse_args() + +# Scale the learning rate linearly with the batch size. When the batch size is +# 128, the learning rate should be 0.1. +_INITIAL_LEARNING_RATE = 0.1 * FLAGS.batch_size / 128 +_MOMENTUM = 0.9 + +# We use a weight decay of 0.0002, which performs better than the 0.0001 that +# was originally suggested. +_WEIGHT_DECAY = 2e-4 + +_BATCHES_PER_EPOCH = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size + + +def record_dataset(filenames): + """Returns an input pipeline Dataset from `filenames`.""" + record_bytes = HEIGHT * WIDTH * DEPTH + 1 + return tf.contrib.data.FixedLengthRecordDataset(filenames, record_bytes) + + +def filenames(mode): + """Returns a list of filenames based on 'mode'.""" + data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') + + assert os.path.exists(data_dir), ('Run cifar10_download_and_extract.py first ' + 'to download and extract the CIFAR-10 data.') + + if mode == tf.estimator.ModeKeys.TRAIN: + return [ + os.path.join(data_dir, 'data_batch_%d.bin' % i) + for i in range(1, NUM_DATA_BATCHES + 1) + ] + elif mode == tf.estimator.ModeKeys.EVAL: + return [os.path.join(data_dir, 'test_batch.bin')] + else: + raise ValueError('Invalid mode: %s' % mode) + + +def dataset_parser(value): + """Parse a CIFAR-10 record from value.""" + # Every record consists of a label followed by the image, with a fixed number + # of bytes for each. + label_bytes = 1 + image_bytes = HEIGHT * WIDTH * DEPTH + record_bytes = label_bytes + image_bytes + + # Convert from a string to a vector of uint8 that is record_bytes long. + raw_record = tf.decode_raw(value, tf.uint8) + + # The first byte represents the label, which we convert from uint8 to int32. + label = tf.cast(raw_record[0], tf.int32) + + # The remaining bytes after the label represent the image, which we reshape + # from [depth * height * width] to [depth, height, width]. + depth_major = tf.reshape(raw_record[label_bytes:record_bytes], + [DEPTH, HEIGHT, WIDTH]) + + # Convert from [depth, height, width] to [height, width, depth], and cast as + # float32. + image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) + + return image, tf.one_hot(label, NUM_CLASSES) + + +def train_preprocess_fn(image, label): + """Preprocess a single training image of layout [height, width, depth].""" + # Resize the image to add four extra pixels on each side. + image = tf.image.resize_image_with_crop_or_pad(image, HEIGHT + 8, WIDTH + 8) + + # Randomly crop a [HEIGHT, WIDTH] section of the image. + image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH]) + + # Randomly flip the image horizontally. + image = tf.image.random_flip_left_right(image) + + return image, label + + +def input_fn(mode, batch_size): + """Input_fn using the contrib.data input pipeline for CIFAR-10 dataset. + + Args: + mode: Standard names for model modes (tf.estimators.ModeKeys). + batch_size: The number of samples per batch of input requested. + """ + dataset = record_dataset(filenames(mode)) + + # For training repeat forever. + if mode == tf.estimator.ModeKeys.TRAIN: + dataset = dataset.repeat() + + dataset = dataset.map(dataset_parser, num_threads=1, + output_buffer_size=2 * batch_size) + + # For training, preprocess the image and shuffle. + if mode == tf.estimator.ModeKeys.TRAIN: + dataset = dataset.map(train_preprocess_fn, num_threads=1, + output_buffer_size=2 * batch_size) + + # Ensure that the capacity is sufficiently large to provide good random + # shuffling. + buffer_size = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * 0.4) + 3 * batch_size + dataset = dataset.shuffle(buffer_size=buffer_size) + + # Subtract off the mean and divide by the variance of the pixels. + dataset = dataset.map( + lambda image, label: (tf.image.per_image_standardization(image), label), + num_threads=1, + output_buffer_size=2 * batch_size) + + # Batch results by up to batch_size, and then fetch the tuple from the + # iterator. + iterator = dataset.batch(batch_size).make_one_shot_iterator() + images, labels = iterator.get_next() + + return images, labels + + +def cifar10_model_fn(features, labels, mode): + """Model function for CIFAR-10.""" + tf.summary.image('images', features, max_outputs=6) + + network = resnet_model.cifar10_resnet_v2_generator( + FLAGS.resnet_size, NUM_CLASSES) + + inputs = tf.reshape(features, [-1, HEIGHT, WIDTH, DEPTH]) + logits = network(inputs, mode == tf.estimator.ModeKeys.TRAIN) + + predictions = { + 'classes': tf.argmax(logits, axis=1), + 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') + } + + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + + # Calculate loss, which includes softmax cross entropy and L2 regularization. + cross_entropy = tf.losses.softmax_cross_entropy( + logits=logits, onehot_labels=labels) + + # Create a tensor named cross_entropy for logging purposes. + tf.identity(cross_entropy, name='cross_entropy') + tf.summary.scalar('cross_entropy', cross_entropy) + + # Add weight decay to the loss. + loss = cross_entropy + _WEIGHT_DECAY * tf.add_n( + [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) + + if mode == tf.estimator.ModeKeys.TRAIN: + global_step = tf.train.get_or_create_global_step() + + # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. + boundaries = [int(_BATCHES_PER_EPOCH * epoch) for epoch in [100, 150, 200]] + values = [_INITIAL_LEARNING_RATE * decay for decay in [1, 0.1, 0.01, 0.001]] + learning_rate = tf.train.piecewise_constant( + tf.cast(global_step, tf.int32), boundaries, values) + + # Create a tensor named learning_rate for logging purposes + tf.identity(learning_rate, name='learning_rate') + tf.summary.scalar('learning_rate', learning_rate) + + optimizer = tf.train.MomentumOptimizer( + learning_rate=learning_rate, + momentum=_MOMENTUM) + + # Batch norm requires update ops to be added as a dependency to the train_op + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + train_op = optimizer.minimize(loss, global_step) + else: + train_op = None + + accuracy= tf.metrics.accuracy( + tf.argmax(labels, axis=1), predictions['classes']) + metrics = {'accuracy': accuracy} + + # Create a tensor named train_accuracy for logging purposes + tf.identity(accuracy[1], name='train_accuracy') + tf.summary.scalar('train_accuracy', accuracy[1]) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics) + + +def main(unused_argv): + # Using the Winograd non-fused algorithms provides a small performance boost. + os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' + + cifar_classifier = tf.estimator.Estimator( + model_fn=cifar10_model_fn, model_dir=FLAGS.model_dir) + + for cycle in range(FLAGS.train_steps // FLAGS.steps_per_eval): + tensors_to_log = { + 'learning_rate': 'learning_rate', + 'cross_entropy': 'cross_entropy', + 'train_accuracy': 'train_accuracy' + } + + logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=100) + + cifar_classifier.train( + input_fn=lambda: input_fn(tf.estimator.ModeKeys.TRAIN, + batch_size=FLAGS.batch_size), + steps=FLAGS.steps_per_eval, + hooks=[logging_hook]) + + # Evaluate the model and print results + eval_results = cifar_classifier.evaluate( + input_fn=lambda: input_fn(tf.estimator.ModeKeys.EVAL, + batch_size=FLAGS.batch_size)) + print(eval_results) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() diff --git a/official/resnet/cifar10_test.py b/official/resnet/cifar10_test.py new file mode 100644 index 00000000000..ffe1e5fcbb5 --- /dev/null +++ b/official/resnet/cifar10_test.py @@ -0,0 +1,104 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +from tempfile import mkdtemp +from tempfile import mkstemp + +import numpy as np +import tensorflow as tf + +import cifar10_main + +tf.logging.set_verbosity(tf.logging.ERROR) + + +class BaseTest(tf.test.TestCase): + def test_dataset_input_fn(self): + fake_data = bytearray() + fake_data.append(7) + for i in xrange(3): + for j in xrange(1024): + fake_data.append(i) + + _, filename = mkstemp(dir=self.get_temp_dir()) + file = open(filename, 'wb') + file.write(fake_data) + file.close() + + fake_dataset = cifar10_main.record_dataset(filename) + fake_dataset = fake_dataset.map(cifar10_main.dataset_parser) + image, label = fake_dataset.make_one_shot_iterator().get_next() + + self.assertEqual(label.get_shape().as_list(), [10]) + self.assertEqual(image.get_shape().as_list(), [32, 32, 3]) + + with self.test_session() as sess: + image, label = sess.run([image, label]) + + self.assertAllEqual(label, np.array([int(i == 7) for i in range(10)])) + + for row in image: + for pixel in row: + self.assertAllEqual(pixel, np.array([0, 1, 2])) + + def input_fn(self): + features = tf.random_uniform([FLAGS.batch_size, 32, 32, 3]) + labels = tf.random_uniform( + [FLAGS.batch_size], maxval=9, dtype=tf.int32) + return features, tf.one_hot(labels, 10) + + def cifar10_model_fn_helper(self, mode): + features, labels = self.input_fn() + spec = cifar10_main.cifar10_model_fn(features, labels, mode) + + predictions = spec.predictions + self.assertAllEqual(predictions['probabilities'].shape, + (FLAGS.batch_size, 10)) + self.assertEqual(predictions['probabilities'].dtype, tf.float32) + self.assertAllEqual(predictions['classes'].shape, (FLAGS.batch_size,)) + self.assertEqual(predictions['classes'].dtype, tf.int64) + + if mode != tf.estimator.ModeKeys.PREDICT: + loss = spec.loss + self.assertAllEqual(loss.shape, ()) + self.assertEqual(loss.dtype, tf.float32) + + if mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = spec.eval_metric_ops + self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ()) + self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ()) + self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32) + self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32) + + def test_cifar10_model_fn_train_mode(self): + self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN) + + def test_cifar10_model_fn_eval_mode(self): + self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL) + + def test_cifar10_model_fn_predict_mode(self): + self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT) + + +if __name__ == '__main__': + FLAGS = cifar10_main.parser.parse_args() + cifar10_main.FLAGS = FLAGS + tf.test.main() diff --git a/official/resnet/imagenet.py b/official/resnet/imagenet.py new file mode 100644 index 00000000000..3f4ff062d9e --- /dev/null +++ b/official/resnet/imagenet.py @@ -0,0 +1,194 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes. + +Some images have one or more bounding boxes associated with the label of the +image. See details here: http://image-net.org/download-bboxes + +ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use +"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech ) +and SYNSET OFFSET of WordNet. For more information, please refer to the +WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/]. + +"There are bounding boxes for over 3000 popular synsets available. +For each synset, there are on average 150 images with bounding boxes." + +WARNING: Don't use for object detection, in this case all the bounding boxes +of the image belong to just one class. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +from six.moves import urllib +import tensorflow as tf + +import imagenet_dataset_utils + +slim = tf.contrib.slim + +# TODO(nsilberman): Add tfrecord file type once the script is updated. +_FILE_PATTERN = '%s-*' + +_SPLITS_TO_SIZES = { + 'train': 1281167, + 'validation': 50000, +} + +_ITEMS_TO_DESCRIPTIONS = { + 'image': 'A color image of varying height and width.', + 'label': 'The label id of the image, integer between 0 and 999', + 'label_text': 'The text of the label.', + 'object/bbox': 'A list of bounding boxes.', + 'object/label': 'A list of labels, one per each object.', +} + +_NUM_CLASSES = 1001 + + +def create_readable_names_for_imagenet_labels(): + """Create a dict mapping label id to human readable string. + + Returns: + labels_to_names: dictionary where keys are integers from to 1000 + and values are human-readable names. + + We retrieve a synset file, which contains a list of valid synset labels used + by ILSVRC competition. There is one synset one per line, eg. + # n01440764 + # n01443537 + We also retrieve a synset_to_human_file, which contains a mapping from synsets + to human-readable names for every synset in Imagenet. These are stored in a + tsv format, as follows: + # n02119247 black fox + # n02119359 silver fox + We assign each synset (in alphabetical order) an integer, starting from 1 + (since 0 is reserved for the background class). + + Code is based on + https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463 + """ + + # pylint: disable=g-line-too-long + base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/' + synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) + synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) + + filename, _ = urllib.request.urlretrieve(synset_url) + synset_list = [s.strip() for s in open(filename).readlines()] + num_synsets_in_ilsvrc = len(synset_list) + assert num_synsets_in_ilsvrc == 1000 + + filename, _ = urllib.request.urlretrieve(synset_to_human_url) + synset_to_human_list = open(filename).readlines() + num_synsets_in_all_imagenet = len(synset_to_human_list) + assert num_synsets_in_all_imagenet == 21842 + + synset_to_human = {} + for s in synset_to_human_list: + parts = s.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + + label_index = 1 + labels_to_names = {0: 'background'} + for synset in synset_list: + name = synset_to_human[synset] + labels_to_names[label_index] = name + label_index += 1 + + return labels_to_names + + +def get_split(split_name, dataset_dir, file_pattern=None, reader=None): + """Gets a dataset tuple with instructions for reading ImageNet. + + Args: + split_name: A train/test split name. + dataset_dir: The base directory of the dataset sources. + file_pattern: The file pattern to use when matching the dataset sources. + It is assumed that the pattern contains a '%s' string so that the split + name can be inserted. + reader: The TensorFlow reader type. + + Returns: + A `Dataset` namedtuple. + + Raises: + ValueError: if `split_name` is not a valid train/test split. + """ + if split_name not in _SPLITS_TO_SIZES: + raise ValueError('split name %s was not recognized.' % split_name) + + if not file_pattern: + file_pattern = _FILE_PATTERN + file_pattern = os.path.join(dataset_dir, file_pattern % split_name) + + # Allowing None in the signature so that dataset_factory can use the default. + if reader is None: + reader = tf.TFRecordReader + + keys_to_features = { + 'image/encoded': tf.FixedLenFeature( + (), tf.string, default_value=''), + 'image/format': tf.FixedLenFeature( + (), tf.string, default_value='jpeg'), + 'image/class/label': tf.FixedLenFeature( + [], dtype=tf.int64, default_value=-1), + 'image/class/text': tf.FixedLenFeature( + [], dtype=tf.string, default_value=''), + 'image/object/bbox/xmin': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/ymin': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/xmax': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/bbox/ymax': tf.VarLenFeature( + dtype=tf.float32), + 'image/object/class/label': tf.VarLenFeature( + dtype=tf.int64), + } + + items_to_handlers = { + 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), + 'label': slim.tfexample_decoder.Tensor('image/class/label'), + 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), + 'object/bbox': slim.tfexample_decoder.BoundingBox( + ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), + 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), + } + + decoder = slim.tfexample_decoder.TFExampleDecoder( + keys_to_features, items_to_handlers) + + labels_to_names = None + if imagenet_dataset_utils.has_labels(dataset_dir): + labels_to_names = imagenet_dataset_utils.read_label_file(dataset_dir) + else: + labels_to_names = create_readable_names_for_imagenet_labels() + imagenet_dataset_utils.write_label_file(labels_to_names, dataset_dir) + + return slim.dataset.Dataset( + data_sources=file_pattern, + reader=reader, + decoder=decoder, + num_samples=_SPLITS_TO_SIZES[split_name], + items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, + num_classes=_NUM_CLASSES, + labels_to_names=labels_to_names) diff --git a/official/resnet/imagenet_dataset_utils.py b/official/resnet/imagenet_dataset_utils.py new file mode 100644 index 00000000000..5ffc065b7e6 --- /dev/null +++ b/official/resnet/imagenet_dataset_utils.py @@ -0,0 +1,137 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains utilities for downloading and converting datasets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import tarfile + +from six.moves import urllib +import tensorflow as tf + +LABELS_FILENAME = 'labels.txt' + + +def int64_feature(values): + """Returns a TF-Feature of int64s. + + Args: + values: A scalar or list of values. + + Returns: + a TF-Feature. + """ + if not isinstance(values, (tuple, list)): + values = [values] + return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) + + +def bytes_feature(values): + """Returns a TF-Feature of bytes. + + Args: + values: A string. + + Returns: + a TF-Feature. + """ + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) + + +def image_to_tfexample(image_data, image_format, height, width, class_id): + return tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': bytes_feature(image_data), + 'image/format': bytes_feature(image_format), + 'image/class/label': int64_feature(class_id), + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + })) + + +def download_and_uncompress_tarball(tarball_url, dataset_dir): + """Downloads the `tarball_url` and uncompresses it locally. + + Args: + tarball_url: The URL of a tarball file. + dataset_dir: The directory where the temporary files are stored. + """ + filename = tarball_url.split('/')[-1] + filepath = os.path.join(dataset_dir, filename) + + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % ( + filename, float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') + tarfile.open(filepath, 'r:gz').extractall(dataset_dir) + + +def write_label_file(labels_to_class_names, dataset_dir, + filename=LABELS_FILENAME): + """Writes a file with the list of class names. + + Args: + labels_to_class_names: A map of (integer) labels to class names. + dataset_dir: The directory in which the labels file should be written. + filename: The filename where the class names are written. + """ + labels_filename = os.path.join(dataset_dir, filename) + with tf.gfile.Open(labels_filename, 'w') as f: + for label in labels_to_class_names: + class_name = labels_to_class_names[label] + f.write('%d:%s\n' % (label, class_name)) + + +def has_labels(dataset_dir, filename=LABELS_FILENAME): + """Specifies whether or not the dataset directory contains a label map file. + + Args: + dataset_dir: The directory in which the labels file is found. + filename: The filename where the class names are written. + + Returns: + `True` if the labels file exists and `False` otherwise. + """ + return tf.gfile.Exists(os.path.join(dataset_dir, filename)) + + +def read_label_file(dataset_dir, filename=LABELS_FILENAME): + """Reads the labels file and returns a mapping from ID to class name. + + Args: + dataset_dir: The directory in which the labels file is found. + filename: The filename where the class names are written. + + Returns: + A map from a label (integer) to class name. + """ + labels_filename = os.path.join(dataset_dir, filename) + with tf.gfile.Open(labels_filename, 'r') as f: + lines = f.read().decode() + lines = lines.split('\n') + lines = filter(None, lines) + + labels_to_class_names = {} + for line in lines: + index = line.index(':') + labels_to_class_names[int(line[:index])] = line[index+1:] + return labels_to_class_names diff --git a/official/resnet/imagenet_main.py b/official/resnet/imagenet_main.py new file mode 100644 index 00000000000..ae8f8723f11 --- /dev/null +++ b/official/resnet/imagenet_main.py @@ -0,0 +1,214 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +import tensorflow as tf + +import imagenet +import resnet_model +import vgg_preprocessing + +parser = argparse.ArgumentParser() + +parser.add_argument( + '--data_dir', type=str, default='', + help='The directory where the ImageNet input data is stored.') + +parser.add_argument( + '--model_dir', type=str, default='/tmp/resnet_model', + help='The directory where the model will be stored.') + +parser.add_argument( + '--resnet_size', type=int, default=50, choices=[18, 34, 50, 101, 152, 200], + help='The size of the ResNet model to use.') + +parser.add_argument( + '--train_steps', type=int, default=6400000, + help='The number of steps to use for training.') + +parser.add_argument( + '--steps_per_eval', type=int, default=40000, + help='The number of training steps to run between evaluations.') + +parser.add_argument( + '--train_batch_size', type=int, default=32, help='Batch size for training.') + +parser.add_argument( + '--eval_batch_size', type=int, default=100, + help='Batch size for evaluation.') + +parser.add_argument( + '--first_cycle_steps', type=int, default=None, + help='The number of steps to run before the first evaluation. Useful if ' + 'you have stopped partway through a training cycle.') + +FLAGS = parser.parse_args() +_EVAL_STEPS = 50000 // FLAGS.eval_batch_size + +# Scale the learning rate linearly with the batch size. When the batch size is +# 256, the learning rate should be 0.1. +_INITIAL_LEARNING_RATE = 0.1 * FLAGS.train_batch_size / 256 + +_MOMENTUM = 0.9 +_WEIGHT_DECAY = 1e-4 + +train_dataset = imagenet.get_split('train', FLAGS.data_dir) +eval_dataset = imagenet.get_split('validation', FLAGS.data_dir) + +image_preprocessing_fn = vgg_preprocessing.preprocess_image +network = resnet_model.resnet_v2( + resnet_size=FLAGS.resnet_size, num_classes=train_dataset.num_classes) + +batches_per_epoch = train_dataset.num_samples / FLAGS.train_batch_size + + +def input_fn(is_training): + """Input function which provides a single batch for train or eval.""" + batch_size = FLAGS.train_batch_size if is_training else FLAGS.eval_batch_size + dataset = train_dataset if is_training else eval_dataset + capacity_multiplier = 20 if is_training else 2 + min_multiplier = 10 if is_training else 1 + + provider = tf.contrib.slim.dataset_data_provider.DatasetDataProvider( + dataset=dataset, + num_readers=4, + common_queue_capacity=capacity_multiplier * batch_size, + common_queue_min=min_multiplier * batch_size) + + image, label = provider.get(['image', 'label']) + + image = image_preprocessing_fn(image=image, + output_height=network.default_image_size, + output_width=network.default_image_size, + is_training=is_training) + + images, labels = tf.train.batch(tensors=[image, label], + batch_size=batch_size, + num_threads=4, + capacity=5 * batch_size) + + labels = tf.one_hot(labels, imagenet._NUM_CLASSES) + return images, labels + + +def resnet_model_fn(features, labels, mode): + """ Our model_fn for ResNet to be used with our Estimator.""" + tf.summary.image('images', features, max_outputs=6) + + logits = network( + inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) + + predictions = { + 'classes': tf.argmax(logits, axis=1), + 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') + } + + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + + # Calculate loss, which includes softmax cross entropy and L2 regularization. + cross_entropy = tf.losses.softmax_cross_entropy( + logits=logits, onehot_labels=labels) + + # Create a tensor named cross_entropy for logging purposes. + tf.identity(cross_entropy, name='cross_entropy') + tf.summary.scalar('cross_entropy', cross_entropy) + + # Add weight decay to the loss. We perform weight decay on all trainable + # variables, which includes batch norm beta and gamma variables. + loss = cross_entropy + _WEIGHT_DECAY * tf.add_n( + [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) + + if mode == tf.estimator.ModeKeys.TRAIN: + global_step = tf.train.get_or_create_global_step() + + # Multiply the learning rate by 0.1 at 30, 60, 120, and 150 epochs. + boundaries = [ + int(batches_per_epoch * epoch) for epoch in [30, 60, 120, 150]] + values = [ + _INITIAL_LEARNING_RATE * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]] + learning_rate = tf.train.piecewise_constant( + tf.cast(global_step, tf.int32), boundaries, values) + + # Create a tensor named learning_rate for logging purposes. + tf.identity(learning_rate, name='learning_rate') + tf.summary.scalar('learning_rate', learning_rate) + + optimizer = tf.train.MomentumOptimizer( + learning_rate=learning_rate, + momentum=_MOMENTUM) + + # Batch norm requires update_ops to be added as a train_op dependency. + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + train_op = optimizer.minimize(loss, global_step) + else: + train_op = None + + accuracy = tf.metrics.accuracy( + tf.argmax(labels, axis=1), predictions['classes']) + metrics = {'accuracy': accuracy} + + # Create a tensor named train_accuracy for logging purposes. + tf.identity(accuracy[1], name='train_accuracy') + tf.summary.scalar('train_accuracy', accuracy[1]) + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics) + + +def main(unused_argv): + # Using the Winograd non-fused algorithms provides a small performance boost. + os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' + + resnet_classifier = tf.estimator.Estimator( + model_fn=resnet_model_fn, model_dir=FLAGS.model_dir) + + for cycle in range(FLAGS.train_steps // FLAGS.steps_per_eval): + tensors_to_log = { + 'learning_rate': 'learning_rate', + 'cross_entropy': 'cross_entropy', + 'train_accuracy': 'train_accuracy' + } + + logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=100) + + print('Starting a training cycle.') + resnet_classifier.train( + input_fn=lambda: input_fn(True), + steps=FLAGS.first_cycle_steps or FLAGS.steps_per_eval, + hooks=[logging_hook]) + FLAGS.first_cycle_steps = None + + print('Starting to evaluate.') + eval_results = resnet_classifier.evaluate( + input_fn=lambda: input_fn(False), steps=_EVAL_STEPS) + print(eval_results) + + +if __name__ == '__main__': + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() diff --git a/official/resnet/imagenet_test.py b/official/resnet/imagenet_test.py new file mode 100644 index 00000000000..8aca8a4c06d --- /dev/null +++ b/official/resnet/imagenet_test.py @@ -0,0 +1,177 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest + +import tensorflow as tf + +import imagenet_main +import resnet_model + +tf.logging.set_verbosity(tf.logging.ERROR) + +_LABEL_CLASSES = 1001 + + +class BaseTest(tf.test.TestCase): + + def tensor_shapes_helper(self, resnet_size, with_gpu=False): + """Checks the tensor shapes after each phase of the ResNet model.""" + def reshape(shape): + """Returns the expected dimensions depending on if gpu is being used. + + If a GPU is used for the test, the shape is returned (already in NCHW + form). When GPU is not used, the shape is converted to NHWC. + """ + if with_gpu: + return shape + return shape[0], shape[2], shape[3], shape[1] + + graph = tf.Graph() + + with graph.as_default(), self.test_session( + use_gpu=with_gpu, force_gpu=with_gpu): + model = resnet_model.resnet_v2( + resnet_size, 456, + data_format='channels_first' if with_gpu else 'channels_last') + inputs = tf.random_uniform([1, 224, 224, 3]) + output = model(inputs, is_training=True) + + initial_conv = graph.get_tensor_by_name('initial_conv:0') + max_pool = graph.get_tensor_by_name('initial_max_pool:0') + block_layer1 = graph.get_tensor_by_name('block_layer1:0') + block_layer2 = graph.get_tensor_by_name('block_layer2:0') + block_layer3 = graph.get_tensor_by_name('block_layer3:0') + block_layer4 = graph.get_tensor_by_name('block_layer4:0') + avg_pool = graph.get_tensor_by_name('final_avg_pool:0') + dense = graph.get_tensor_by_name('final_dense:0') + + self.assertAllEqual(initial_conv.shape, reshape((1, 64, 112, 112))) + self.assertAllEqual(max_pool.shape, reshape((1, 64, 56, 56))) + + # The number of channels after each block depends on whether we're + # using the building_block or the bottleneck_block. + if resnet_size < 50: + self.assertAllEqual(block_layer1.shape, reshape((1, 64, 56, 56))) + self.assertAllEqual(block_layer2.shape, reshape((1, 128, 28, 28))) + self.assertAllEqual(block_layer3.shape, reshape((1, 256, 14, 14))) + self.assertAllEqual(block_layer4.shape, reshape((1, 512, 7, 7))) + self.assertAllEqual(avg_pool.shape, reshape((1, 512, 1, 1))) + else: + self.assertAllEqual(block_layer1.shape, reshape((1, 256, 56, 56))) + self.assertAllEqual(block_layer2.shape, reshape((1, 512, 28, 28))) + self.assertAllEqual(block_layer3.shape, reshape((1, 1024, 14, 14))) + self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7))) + self.assertAllEqual(avg_pool.shape, reshape((1, 2048, 1, 1))) + + self.assertAllEqual(dense.shape, (1, 456)) + self.assertAllEqual(output.shape, (1, 456)) + + def test_tensor_shapes_resnet_18(self): + self.tensor_shapes_helper(18) + + def test_tensor_shapes_resnet_34(self): + self.tensor_shapes_helper(34) + + def test_tensor_shapes_resnet_50(self): + self.tensor_shapes_helper(50) + + def test_tensor_shapes_resnet_101(self): + self.tensor_shapes_helper(101) + + def test_tensor_shapes_resnet_152(self): + self.tensor_shapes_helper(152) + + def test_tensor_shapes_resnet_200(self): + self.tensor_shapes_helper(200) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_18_with_gpu(self): + self.tensor_shapes_helper(18, True) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_34_with_gpu(self): + self.tensor_shapes_helper(34, True) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_50_with_gpu(self): + self.tensor_shapes_helper(50, True) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_101_with_gpu(self): + self.tensor_shapes_helper(101, True) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_152_with_gpu(self): + self.tensor_shapes_helper(152, True) + + @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU') + def test_tensor_shapes_resnet_200_with_gpu(self): + self.tensor_shapes_helper(200, True) + + def input_fn(self): + """Provides random features and labels.""" + features = tf.random_uniform([FLAGS.train_batch_size, 224, 224, 3]) + labels = tf.one_hot( + tf.random_uniform( + [FLAGS.train_batch_size], maxval=_LABEL_CLASSES - 1, + dtype=tf.int32), + _LABEL_CLASSES) + + return features, labels + + def resnet_model_fn_helper(self, mode): + """Tests that the EstimatorSpec is given the appropriate arguments.""" + tf.train.create_global_step() + + features, labels = self.input_fn() + spec = imagenet_main.resnet_model_fn(features, labels, mode) + + predictions = spec.predictions + self.assertAllEqual(predictions['probabilities'].shape, + (FLAGS.train_batch_size, _LABEL_CLASSES)) + self.assertEqual(predictions['probabilities'].dtype, tf.float32) + self.assertAllEqual(predictions['classes'].shape, (FLAGS.train_batch_size,)) + self.assertEqual(predictions['classes'].dtype, tf.int64) + + if mode != tf.estimator.ModeKeys.PREDICT: + loss = spec.loss + self.assertAllEqual(loss.shape, ()) + self.assertEqual(loss.dtype, tf.float32) + + if mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = spec.eval_metric_ops + self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ()) + self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ()) + self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32) + self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32) + + def test_resnet_model_fn_train_mode(self): + self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN) + + def test_resnet_model_fn_eval_mode(self): + self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL) + + def test_resnet_model_fn_predict_mode(self): + self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT) + + +if __name__ == '__main__': + FLAGS = imagenet_main.FLAGS + tf.test.main() diff --git a/official/resnet/resnet_model.py b/official/resnet/resnet_model.py new file mode 100644 index 00000000000..170664d30ee --- /dev/null +++ b/official/resnet/resnet_model.py @@ -0,0 +1,361 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for the preactivation form of Residual Networks. + +Residual networks (ResNets) were originally proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +The full preactivation 'v2' ResNet variant implemented in this module was +introduced by: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The key difference of the full preactivation 'v2' variant compared to the +'v1' variant in [1] is the use of batch normalization before every weight layer +rather than after. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +_BATCH_NORM_DECAY = 0.997 +_BATCH_NORM_EPSILON = 1e-5 + + +def batch_norm_relu(inputs, is_training, data_format): + """Performs a batch normalization followed by a ReLU.""" + # We set fused=True for a significant performance boost. + inputs = tf.layers.batch_normalization( + inputs=inputs, axis=1 if data_format == 'channels_first' else 3, + momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, + scale=True, training=is_training, fused=True) + inputs = tf.nn.relu(inputs) + return inputs + + +def fixed_padding(inputs, kernel_size, data_format): + """Pads the input along the spatial dimensions independently of input size. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + Should be a positive integer. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + A tensor with the same format as the input with the data either intact + (if kernel_size == 1) or padded (if kernel_size > 1). + """ + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + + if data_format == 'channels_first': + padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], + [pad_beg, pad_end], [pad_beg, pad_end]]) + else: + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], + [pad_beg, pad_end], [0, 0]]) + return padded_inputs + + +def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): + """Strided 2-D convolution with explicit padding. + + The padding is consistent and is based only on `kernel_size`, not on the + dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). + """ + if strides > 1: + inputs = fixed_padding(inputs, kernel_size, data_format) + + return tf.layers.conv2d( + inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, + padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, + kernel_initializer=tf.variance_scaling_initializer(), + data_format=data_format) + + +def building_block(inputs, filters, is_training, projection_shortcut, strides, + data_format): + """Standard building block for residual networks with BN before convolutions. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + filters: The number of filters for the convolutions. + is_training: A Boolean for whether the model is in training or inference + mode. Needed for batch normalization. + projection_shortcut: The function to use for projection shortcuts (typically + a 1x1 convolution when downsampling the input). + strides: The block's stride. If greater than 1, this block will ultimately + downsample the input. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + The output tensor of the block. + """ + shortcut = inputs + inputs = batch_norm_relu(inputs, is_training, data_format) + + # The projection shortcut should come after the first batch norm and ReLU + # since it performs a 1x1 convolution. + if projection_shortcut is not None: + shortcut = projection_shortcut(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=3, strides=strides, + data_format=data_format) + + inputs = batch_norm_relu(inputs, is_training, data_format) + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=3, strides=1, + data_format=data_format) + + return inputs + shortcut + + +def bottleneck_block(inputs, filters, is_training, projection_shortcut, + strides, data_format): + """Bottleneck block variant for residual networks with BN before convolutions. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + filters: The number of filters for the first two convolutions. Note that the + third and final convolution will use 4 times as many filters. + is_training: A Boolean for whether the model is in training or inference + mode. Needed for batch normalization. + projection_shortcut: The function to use for projection shortcuts (typically + a 1x1 convolution when downsampling the input). + strides: The block's stride. If greater than 1, this block will ultimately + downsample the input. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + The output tensor of the block. + """ + shortcut = inputs + inputs = batch_norm_relu(inputs, is_training, data_format) + + # The projection shortcut should come after the first batch norm and ReLU + # since it performs a 1x1 convolution. + if projection_shortcut is not None: + shortcut = projection_shortcut(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=1, strides=1, + data_format=data_format) + + inputs = batch_norm_relu(inputs, is_training, data_format) + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=3, strides=strides, + data_format=data_format) + + inputs = batch_norm_relu(inputs, is_training, data_format) + inputs = conv2d_fixed_padding( + inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, + data_format=data_format) + + return inputs + shortcut + + +def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name, + data_format): + """Creates one layer of blocks for the ResNet model. + + Args: + inputs: A tensor of size [batch, channels, height_in, width_in] or + [batch, height_in, width_in, channels] depending on data_format. + filters: The number of filters for the first convolution of the layer. + block_fn: The block to use within the model, either `building_block` or + `bottleneck_block`. + blocks: The number of blocks contained in the layer. + strides: The stride to use for the first convolution of the layer. If + greater than 1, this layer will ultimately downsample the input. + is_training: Either True or False, whether we are currently training the + model. Needed for batch norm. + name: A string name for the tensor output of the block layer. + data_format: The input format ('channels_last' or 'channels_first'). + + Returns: + The output tensor of the block layer. + """ + # Bottleneck blocks end with 4x the number of filters as they start with + filters_out = 4 * filters if block_fn is bottleneck_block else filters + + def projection_shortcut(inputs): + return conv2d_fixed_padding( + inputs=inputs, filters=filters_out, kernel_size=1, strides=strides, + data_format=data_format) + + # Only the first block per block_layer uses projection_shortcut and strides + inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides, + data_format) + + for i in range(1, blocks): + inputs = block_fn(inputs, filters, is_training, None, 1, data_format) + + return tf.identity(inputs, name) + + +def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None): + """Generator for CIFAR-10 ResNet v2 models. + + Args: + resnet_size: A single integer for the size of the ResNet model. + num_classes: The number of possible classes for image classification. + data_format: The input format ('channels_last', 'channels_first', or None). + If set to None, the format is dependent on whether a GPU is available. + + Returns: + The model function that takes in `inputs` and `is_training` and + returns the output tensor of the ResNet model. + """ + if resnet_size % 6 != 2: + raise ValueError('resnet_size must be 6n + 2:', resnet_size) + + num_blocks = (resnet_size - 2) // 6 + + if data_format is None: + data_format = 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last' + + def model(inputs, is_training): + if data_format == 'channels_first': + # Convert from channels_last (NHWC) to channels_first (NCHW). This + # provides a large performance boost on GPU. + inputs = tf.transpose(inputs, [0, 3, 1, 2]) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=16, kernel_size=3, strides=1, + data_format=data_format) + inputs = tf.identity(inputs, 'initial_conv') + + inputs = block_layer( + inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks, + strides=1, is_training=is_training, name='block_layer1', + data_format=data_format) + inputs = block_layer( + inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks, + strides=2, is_training=is_training, name='block_layer2', + data_format=data_format) + inputs = block_layer( + inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks, + strides=2, is_training=is_training, name='block_layer3', + data_format=data_format) + + inputs = batch_norm_relu(inputs, is_training, data_format) + + inputs = tf.layers.average_pooling2d( + inputs=inputs, pool_size=8, strides=1, padding='VALID', + data_format=data_format) + inputs = tf.identity(inputs, 'final_avg_pool') + inputs = tf.reshape(inputs, [-1, 64]) + inputs = tf.layers.dense( + inputs=inputs, units=num_classes) + inputs = tf.identity(inputs, 'final_dense') + return inputs + + model.default_image_size = 32 + return model + + +def imagenet_resnet_v2_generator(block_fn, layers, num_classes, + data_format=None): + """Generator for ImageNet ResNet v2 models. + + Args: + block_fn: The block to use within the model, either `building_block` or + `bottleneck_block`. + layers: A length-4 array denoting the number of blocks to include in each + layer. Each layer consists of blocks that take inputs of the same size. + num_classes: The number of possible classes for image classification. + data_format: The input format ('channels_last', 'channels_first', or None). + If set to None, the format is dependent on whether a GPU is available. + + Returns: + The model function that takes in `inputs` and `is_training` and + returns the output tensor of the ResNet model. + """ + if data_format is None: + data_format = 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last' + + def model(inputs, is_training): + if data_format == 'channels_first': + # Convert from channels_last (NHWC) to channels_first (NCHW). This + # provides a large performance boost on GPU. + inputs = tf.transpose(inputs, [0, 3, 1, 2]) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=64, kernel_size=7, strides=2, + data_format=data_format) + inputs = tf.identity(inputs, 'initial_conv') + inputs = tf.layers.max_pooling2d( + inputs=inputs, pool_size=3, strides=2, padding='SAME', + data_format=data_format) + inputs = tf.identity(inputs, 'initial_max_pool') + + inputs = block_layer( + inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0], + strides=1, is_training=is_training, name='block_layer1', + data_format=data_format) + inputs = block_layer( + inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1], + strides=2, is_training=is_training, name='block_layer2', + data_format=data_format) + inputs = block_layer( + inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2], + strides=2, is_training=is_training, name='block_layer3', + data_format=data_format) + inputs = block_layer( + inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3], + strides=2, is_training=is_training, name='block_layer4', + data_format=data_format) + + inputs = batch_norm_relu(inputs, is_training, data_format) + inputs = tf.layers.average_pooling2d( + inputs=inputs, pool_size=7, strides=1, padding='VALID', + data_format=data_format) + inputs = tf.identity(inputs, 'final_avg_pool') + inputs = tf.reshape(inputs, [inputs.get_shape()[0].value, -1]) + inputs = tf.layers.dense(inputs=inputs, units=num_classes) + inputs = tf.identity(inputs, 'final_dense') + return inputs + + model.default_image_size = 224 + return model + + +def resnet_v2(resnet_size, num_classes, data_format=None): + """Returns the ResNet model for a given size and number of output classes.""" + model_params = { + 18: {'block': building_block, 'layers': [2, 2, 2, 2]}, + 34: {'block': building_block, 'layers': [3, 4, 6, 3]}, + 50: {'block': bottleneck_block, 'layers': [3, 4, 6, 3]}, + 101: {'block': bottleneck_block, 'layers': [3, 4, 23, 3]}, + 152: {'block': bottleneck_block, 'layers': [3, 8, 36, 3]}, + 200: {'block': bottleneck_block, 'layers': [3, 24, 36, 3]} + } + + if resnet_size not in model_params: + raise ValueError('Not a valid resnet_size:', resnet_size) + + params = model_params[resnet_size] + return imagenet_resnet_v2_generator( + params['block'], params['layers'], num_classes, data_format) diff --git a/official/resnet/vgg_preprocessing.py b/official/resnet/vgg_preprocessing.py new file mode 100644 index 00000000000..585fc1beca1 --- /dev/null +++ b/official/resnet/vgg_preprocessing.py @@ -0,0 +1,363 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images. + +The preprocessing steps for VGG were introduced in the following technical +report: + + Very Deep Convolutional Networks For Large-Scale Image Recognition + Karen Simonyan and Andrew Zisserman + arXiv technical report, 2015 + PDF: http://arxiv.org/pdf/1409.1556.pdf + ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf + CC-BY-4.0 + +More information can be obtained from the VGG website: +www.robots.ox.ac.uk/~vgg/research/very_deep/ +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +_R_MEAN = 123.68 +_G_MEAN = 116.78 +_B_MEAN = 103.94 + +_RESIZE_SIDE_MIN = 256 +_RESIZE_SIDE_MAX = 512 + + +def _crop(image, offset_height, offset_width, crop_height, crop_width): + """Crops the given image using the provided offsets and sizes. + + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. + + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. + + Returns: + the cropped (and resized) image. + + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(image) + + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + with tf.control_dependencies([rank_assertion]): + cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) + + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) + + offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + with tf.control_dependencies([size_assertion]): + image = tf.slice(image, offsets, cropped_shape) + return tf.reshape(image, cropped_shape) + + +def _random_crop(image_list, crop_height, crop_width): + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + with tf.control_dependencies([rank_assertions[0]]): + image_shape = tf.shape(image_list[0]) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + with tf.control_dependencies([rank_assertions[i]]): + shape = tf.shape(image) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + with tf.control_dependencies(asserts): + max_offset_height = tf.reshape(image_height - crop_height + 1, []) + with tf.control_dependencies(asserts): + max_offset_width = tf.reshape(image_width - crop_width + 1, []) + offset_height = tf.random_uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random_uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] + + +def _central_crop(image_list, crop_height, crop_width): + """Performs central crops of the given image list. + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. + + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 + + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs + + +def _mean_image_subtraction(image, means): + """Subtracts the given means from each image channel. + + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) + + Note that the rank of `image` must be known. + + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. + + Returns: + the centered image. + + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') + + channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(axis=2, values=channels) + + +def _smallest_size_at_least(height, width, smallest_side): + """Computes new shape with the smallest side equal to `smallest_side`. + + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. + + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + + height = tf.to_float(height) + width = tf.to_float(width) + smallest_side = tf.to_float(smallest_side) + + scale = tf.cond(tf.greater(height, width), + lambda: smallest_side / width, + lambda: smallest_side / height) + new_height = tf.to_int32(height * scale) + new_width = tf.to_int32(width * scale) + return new_height, new_width + + +def _aspect_preserving_resize(image, smallest_side): + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + + shape = tf.shape(image) + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, smallest_side) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize_bilinear(image, [new_height, new_width], + align_corners=False) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image + + +def preprocess_for_train(image, + output_height, + output_width, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX): + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + resize_side = tf.random_uniform( + [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) + + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + image = tf.image.random_flip_left_right(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + + +def preprocess_for_eval(image, output_height, output_width, resize_side): + """Preprocesses the given image for evaluation. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side: The smallest side of the image for aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, resize_side) + image = _central_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + + +def preprocess_image(image, output_height, output_width, is_training=False, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX): + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) diff --git a/official/testing/docker_test.sh b/official/testing/docker_test.sh new file mode 100755 index 00000000000..231ae69f155 --- /dev/null +++ b/official/testing/docker_test.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# DO NOT MODIFY THIS FILE. Add tests to be executed in test_models.sh +# Usage: docker_test.sh [--docker-image ] +# +# DOCKERFILE_IMG_NAME: (Optional) The tensorflow docker container version +# If this optional value is not supplied (via the +# --docker-image flag), the default latest tensorflow docker +# will be used. +# + + +# SETUP +# Default exit status +EXIT=0 + +# Get current directory path to mount +export WORKSPACE=${PWD} + +DOCKER_BINARY="docker" + +# Decide docker image and tag +if [[ "$1" == "--docker-image" ]]; then + DOCKER_IMG_NAME="$2" + echo "Using specified docker tensorflow image and tag: ${DOCKER_IMG_NAME}" + shift 2 +else + DOCKER_IMG_NAME="tensorflow/tensorflow:1.3.0" + echo "Using the default docker tensorflow image and tag: ${DOCKER_IMG_NAME}" +fi + +# Specify which test is to be run +COMMAND="./testing/test_models.sh" + +# RUN +${DOCKER_BINARY} run \ + -v ${WORKSPACE}:/workspace \ + -w /workspace \ + -t \ + ${DOCKER_IMG_NAME} \ + ${COMMAND} \ + || EXIT=$? + + +# TEARDOWN +${DOCKER_BINARY} rmi \ + -f \ + ${DOCKER_IMG_NAME} + +git clean -dfx + +# Return exit status +exit ${EXIT} \ No newline at end of file diff --git a/official/testing/test_models.sh b/official/testing/test_models.sh new file mode 100755 index 00000000000..a4abadc9f0e --- /dev/null +++ b/official/testing/test_models.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# DO NOT MODIFY THIS FILE. Add tests with file name "*_test.py" to your model's +# directory. +# +# For each individual model in the garden, if a test file is found in that +# directory, it will be run in a docker container. +# +# Usage: This file will be invoked in a docker container by docker_test.sh. + +# Default exit status +EXIT=0 + +# Increase stack size 8x +ulimit -s 65532 + +# Testing all of the models with a valid unit test +echo -e "Testing all models\n" + +# Install coverage +pip install coverage + +for test_file in `find . -name *_test.py -print`; do + echo "Running $test_file." + coverage run $test_file + test_status=$? + if [ ${test_status} -eq 0 ]; then + coverage report + echo -e "TEST PASSED\n" + else + EXIT=${test_status} + echo -e "TEST FAILED\n" + fi +done + +# Return exit status +exit ${EXIT} \ No newline at end of file