-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmnist.py
362 lines (308 loc) · 13.6 KB
/
mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
"""
A simple 6 layer Convolutional Neural Network to recognize handwritten digits.
On the MNIST dataset (http://yann.lecun.com/exdb/mnist/), this model achieves a 0.5% test error.
This is meant as a tutorial/introduction to TensorFlow and is based on the MNIST model example in TensorFlow
(https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/mnist/convolutional.py).
Read the accompanying README for more details.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import getpass
import sys
import time
import gzip
import numpy
import tensorflow as tf
import argparse
from six.moves import xrange
class Config(object):
"""Holds model hyperparams and data information.
The config class is used to store various hyperparameters and dataset
information parameters. Model objects are passed a Config() object at
instantiation.
"""
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
VALIDATION_SIZE = 5000
SEED = 66478 # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 20
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100
L2_REG = 5e-4
BASE_LR = 0.01
DROPOUT = 0.5
class DigitRecognizer():
"""
A Convolutional Neural Network for digit recognition. The model architecture is:
[conv1 - relu1 - pool1] - [conv2 - relu2 - pool2] - [conv3 - relu3] - dropout - fc1 - dropout - fc2
"""
def extract_data(self, filename, num_images):
"""
Extract the images into a tensor of the dimenstions: [num images, image width, image height, num channels].
Values are rescaled from [0, 255] down to [-0.5, 0.5].
"""
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(self.config.IMAGE_SIZE * self.config.IMAGE_SIZE * num_images)
data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
data = (data - (self.config.PIXEL_DEPTH / 2.0)) / self.config.PIXEL_DEPTH
data = data.reshape(num_images, self.config.IMAGE_SIZE, self.config.IMAGE_SIZE, 1)
return data
def extract_labels(self, filename, num_images):
"""
Extract the labels into a vector of label IDs.
"""
with gzip.open(filename) as bytestream:
bytestream.read(8)
buf = bytestream.read(1 * num_images)
labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
return labels
def error_rate(self, predictions, labels):
"""
Return the error rate based on dense predictions and one-hot labels.
"""
return 100.0 - (100.0 * numpy.sum(numpy.argmax(predictions, 1) == labels) / predictions.shape[0])
def add_placeholders(self):
"""
add placeholders variables. These placeholder nodes will be fed a batch of training data at each
training step.
"""
self.train_data_node = tf.placeholder(tf.float32, shape=(self.config.BATCH_SIZE, self.config.IMAGE_SIZE, self.config.IMAGE_SIZE, self.config.NUM_CHANNELS))
self.train_labels_node = tf.placeholder(tf.int64, shape=(self.config.BATCH_SIZE,))
self.eval_data = tf.placeholder(tf.float32, shape=(self.config.EVAL_BATCH_SIZE, self.config.IMAGE_SIZE, self.config.IMAGE_SIZE, self.config.NUM_CHANNELS))
def create_feed_dict(self, input_data, input_labels=None):
"""Creates the feed_dict. A feed_dict takes the form of:
feed_dict = {
<placeholder>: <tensor of values to be passed for placeholder>,
....
}
Args:
input_data: A batch of input data.
input_labels: A batch of label data.
"""
feed_dict = {}
if input_labels is not None:
feed_dict[self.train_data_node] = input_data
feed_dict[self.train_labels_node] = input_labels
else:
feed_dict[self.eval_data] = input_data
return feed_dict
def add_model_vars(self):
"""
The variables below hold all the trainable weights. They are passed an
initial value which will be assigned when we call tf.initialize_all_variables().run()
"""
# conv1: 5x5 filter, depth 32.
self.conv1_weights = tf.Variable(tf.truncated_normal([5, 5, self.config.NUM_CHANNELS, 32], stddev=0.1, seed=self.config.SEED))
self.conv1_biases = tf.Variable(tf.zeros([32]))
#conv2: 5x5 filter, depth 64
self.conv2_weights = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=self.config.SEED))
self.conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
#conv3: 5x5 filter, depth 64
self.conv3_weights = tf.Variable(tf.truncated_normal([5, 5, 64, 64], stddev=0.1, seed=self.config.SEED))
self.conv3_biases = tf.Variable(tf.constant(0.1, shape=[64]))
#fc1: 512 hidden dims
self.fc1_weights = tf.Variable(tf.truncated_normal([self.config.IMAGE_SIZE // 4 * self.config.IMAGE_SIZE // 4 * 64, 512], stddev=0.1, seed=self.config.SEED))
self.fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
#fc2
self.fc2_weights = tf.Variable(tf.truncated_normal([512, self.config.NUM_LABELS], stddev=0.1, seed=self.config.SEED))
self.fc2_biases = tf.Variable(tf.constant(0.1, shape=[self.config.NUM_LABELS]))
def add_model(self, data, train=False, return_hidden=False):
"""
Construct the computational graph that defines our CNN model
"""
#conv. layer 1
conv1 = tf.nn.conv2d(data, self.conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, self.conv1_biases))
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#conv. layer 2
conv2 = tf.nn.conv2d(pool1, self.conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, self.conv2_biases))
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#conv. layer 3 with dropout
conv3 = tf.nn.conv2d(pool2, self.conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
relu3 = tf.nn.relu(tf.nn.bias_add(conv3, self.conv3_biases))
if train:
relu3 = tf.nn.dropout(relu3, self.config.DROPOUT, seed=self.config.SEED)
# Reshape the feature map cuboid to feed it to the fully connected layers.
relu_shape = relu3.get_shape().as_list()
reshaped = tf.reshape(
relu3,
[relu_shape[0], relu_shape[1] * relu_shape[2] * relu_shape[3]])
#fully connected layer 1 with dropout
hidden = tf.nn.relu(tf.matmul(reshaped, self.fc1_weights) + self.fc1_biases)
if train:
hidden = tf.nn.dropout(hidden, self.config.DROPOUT, seed=self.config.SEED)
#add regularization loss
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
tf.nn.l2_loss(self.fc1_weights) + tf.nn.l2_loss(self.fc2_weights) + tf.nn.l2_loss(self.conv1_weights) + tf.nn.l2_loss(self.conv2_weights) + tf.nn.l2_loss(self.conv3_weights))
#fully connected layer 2 (output layer)
if return_hidden:
return hidden
else:
return tf.matmul(hidden, self.fc2_weights) + self.fc2_biases
def add_loss_op(self, logits):
"""Adds cross_entropy_loss ops to the computational graph.
Args:
logits: A tensor of shape (batch_size, n_classes)
Returns:
loss: A 0-d tensor (scalar)
"""
#data loss
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.train_labels_node))
#reg. loss
loss += self.config.L2_REG * tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)[0]
return loss
def add_training_op(self, loss, train_size):
"""
Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable variables.
"""
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(
self.config.BASE_LR,
batch * self.config.BATCH_SIZE,
train_size,
0.95,
staircase=True)
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
train_op = optimizer.minimize(loss, global_step=batch)
return train_op
def __init__(self, config):
"""Constructs the network using the helper functions defined above."""
#config
self.config = config
#extract data
self.train_data = self.extract_data('data/train-images-idx3-ubyte.gz', 60000)
self.train_labels = self.extract_labels('data/train-labels-idx1-ubyte.gz', 60000)
self.test_data = self.extract_data('data/t10k-images-idx3-ubyte.gz', 10000)
self.test_labels = self.extract_labels('data/t10k-labels-idx1-ubyte.gz', 10000)
self.add_placeholders()
self.add_model_vars()
logits = self.add_model(self.train_data_node, train=True)
self.loss = self.add_loss_op(logits)
self.train_op = self.add_training_op(self.loss, self.train_labels.shape[0])
# Predictions for the current training minibatch.
self.train_prediction = tf.nn.softmax(logits)
# Predictions for the test and validation, which we'll compute less often.
self.eval_prediction = tf.nn.softmax(self.add_model(self.eval_data, train=False))
#feature extraction for t-SNE viz.
self.featurize = self.add_model(self.eval_data, train=False, return_hidden=True)
def eval_in_batches(self, data, session):
"""
Get all predictions for a dataset by running it in small batches.
"""
size = data.shape[0]
eval_batch_size = self.config.EVAL_BATCH_SIZE
if size < eval_batch_size:
raise ValueError("batch size for evals larger than dataset: %d" % size)
predictions = numpy.ndarray(shape=(size, self.config.NUM_LABELS), dtype=numpy.float32)
for begin in xrange(0, size, eval_batch_size):
end = begin + eval_batch_size
if end <= size:
predictions[begin:end, :] = session.run(
self.eval_prediction,
feed_dict=self.create_feed_dict(data[begin:end, ...]))
else:
batch_predictions = session.run(
self.eval_prediction,
feed_dict=self.create_feed_dict(data[-eval_batch_size:, ...]))
predictions[begin:, :] = batch_predictions[begin - size:, :]
return predictions
def featurize_in_batches(self, session, sample_size):
"""
Get fully connected layer1 features by running it in small batches.
"""
batch_size = self.config.EVAL_BATCH_SIZE
if sample_size < batch_size:
raise ValueError("batch size for evals larger than dataset: %d" % size)
features = numpy.ndarray(shape=(sample_size, 512), dtype=numpy.float32)
labels = numpy.ndarray(shape=(sample_size,),dtype=numpy.int64)
for begin in xrange(0, sample_size, batch_size):
end = begin + batch_size
if end <= sample_size:
features[begin:end, :] = session.run(
self.featurize,
feed_dict=self.create_feed_dict(self.train_data[begin:end, ...]))
labels[begin:end] = self.train_labels[begin:end]
else:
features[end-batch_size:end,:] = session.run(
self.featurize,
feed_dict=self.create_feed_dict(self.train_data[end-batch_size:end, ...]))
labels[end-batch_size:end] = self.train_labels[end-batch_size:end]
return features, labels
def run_training(self, session):
num_epochs = self.config.NUM_EPOCHS
train_size = self.train_labels.shape[0]
batch_size = self.config.BATCH_SIZE
best_error = 2.0
saver = tf.train.Saver()
for step in xrange(int(num_epochs * train_size) // batch_size):
# Compute the offset of the current minibatch in the data.
# Note that we could use better randomization across epochs.
offset = (step * batch_size) % (train_size - batch_size)
batch_data = self.train_data[offset:(offset + batch_size), ...]
batch_labels = self.train_labels[offset:(offset + batch_size)]
#create feed dictionary
feed_dict = self.create_feed_dict(batch_data, batch_labels)
# Run the graph and fetch some of the nodes.
_, loss, predictions = session.run(
[self.train_op, self.loss, self.train_prediction],
feed_dict=feed_dict)
if step % self.config.EVAL_FREQUENCY == 0:
print('Step %d (epoch %.2f)' %
(step, float(step) * batch_size / train_size))
print('Minibatch loss: %.3f' % (loss))
train_error = self.error_rate(predictions, batch_labels)
print('Minibatch error: %.1f%%' % train_error)
validation_predictions = self.eval_in_batches(self.test_data, session)
validation_error = self.error_rate(validation_predictions, self.test_labels)
print('Test error: %.1f%%' % validation_error)
if validation_error < best_error:
saver.save(session, 'model_weights/weights')
print("New best model saved")
best_error = validation_error
sys.stdout.flush()
def extract_features():
"""
Extract features for t-SNE visualization
"""
config = Config()
config.EVAL_BATCH_SIZE = 50
with tf.Graph().as_default():
model = DigitRecognizer(config)
saver = tf.train.Saver()
with tf.Session() as session:
saver.restore(session, 'model_weights/weights')
features,labels = model.featurize_in_batches(session, 10000)
#save
numpy.save('tsne_features.npy',features)
numpy.save('tsne_labels.npy',labels)
def classify():
"""
Test Digit Recognition model.
"""
# Create a local session to run the training.
config = Config()
with tf.Graph().as_default():
model = DigitRecognizer(config)
init = tf.initialize_all_variables()
with tf.Session() as session:
print('Initializing...')
session.run(init)
print('Training...')
model.run_training(session)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-features', action='store_true')
parser.add_argument('-train', action='store_true')
options = parser.parse_args()
if options.train:
classify()
if options.features:
extract_features()