ahundt · j-varun · Jul 2, 2018 · Jul 6, 2018 · Jul 6, 2018 · Jul 6, 2018
diff --git a/enas/cifar10/block_stacking_reader.py b/enas/cifar10/block_stacking_reader.py
diff --git a/enas/cifar10/data_utils.py b/enas/cifar10/data_utils.py
@@ -53,6 +53,7 @@ def _read_fmnist_data(data_path):
   labels["train"] = np.array(data.train.labels, dtype = np.int32)
   labels["test"] = np.array(data.test.labels, dtype = np.int32)
   print("Read and processed data..")
+  print(labels["test"])
 
   return images, labels
 
@@ -80,6 +81,9 @@ def read_data(data_path, num_valids=5000, dataset = "cifar"):
     images, labels = valid_split_data(images, labels, num_valids)
     return images, labels
 
+  if dataset == "stacking":
+    images["path"] = data_path
+    return images, labels
   else:
     train_files = [
       "data_batch_1",

diff --git a/enas/cifar10/general_child.py b/enas/cifar10/general_child.py
@@ -12,10 +12,11 @@
 from enas.cifar10.image_ops import conv
 from enas.cifar10.image_ops import fully_connected
 from enas.cifar10.image_ops import batch_norm
+from enas.cifar10.image_ops import norm
 from enas.cifar10.image_ops import batch_norm_with_mask
 from enas.cifar10.image_ops import relu
 from enas.cifar10.image_ops import max_pool
-from enas.cifar10.image_ops import global_avg_pool
+from enas.cifar10.image_ops import global_max_pool
 
 from enas.utils import count_model_params
 from enas.utils import get_train_ops
@@ -101,8 +102,10 @@ def _get_C(self, x):
       x: tensor of shape [N, H, W, C] or [N, C, H, W]
     """
     if self.data_format == "NHWC":
+      assert x.get_shape().as_list()[3] is not None
       return x.get_shape()[3].value
     elif self.data_format == "NCHW":
+      assert x.get_shape().as_list()[1] is not None
       return x.get_shape()[1].value
     else:
       raise ValueError("Unknown data_format '{0}'".format(self.data_format))
@@ -112,6 +115,7 @@ def _get_HW(self, x):
     Args:
       x: tensor of shape [N, H, W, C] or [N, C, H, W]
     """
+    assert x.get_shape().as_list()[2] is not None
     return x.get_shape()[2].value
 
   def _get_strides(self, stride):
@@ -136,7 +140,7 @@ def _factorized_reduction(self, x, out_filters, stride, is_training):
         w = create_weight("w", [1, 1, inp_c, out_filters])
         x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME",
                          data_format=self.data_format)
-        x = batch_norm(x, is_training, data_format=self.data_format)
+        x = norm(x, is_training, data_format=self.data_format)
         return x
 
     stride_spec = self._get_strides(stride)
@@ -171,7 +175,7 @@ def _factorized_reduction(self, x, out_filters, stride, is_training):
 
     # Concat and apply BN
     final_path = tf.concat(values=[path1, path2], axis=concat_axis)
-    final_path = batch_norm(final_path, is_training,
+    final_path = norm(final_path, is_training,
                             data_format=self.data_format)
 
     return final_path
@@ -194,11 +198,11 @@ def _model(self, images, is_training, reuse=False):
       layers = []
 
       out_filters = self.out_filters
-      C = self._get_C(images) 
+      C = self._get_C(images)
       with tf.variable_scope("stem_conv"):
         w = create_weight("w", [C, C, C, out_filters])
         x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-        x = batch_norm(x, is_training, data_format=self.data_format)
+        x = norm(x, is_training, data_format=self.data_format)
         layers.append(x)
 
       if self.whole_channels:
@@ -229,7 +233,7 @@ def _model(self, images, is_training, reuse=False):
           start_idx += 2 * self.num_branches + layer_id
         print(layers[-1])
 
-      x = global_avg_pool(x, data_format=self.data_format)
+      x = global_max_pool(x, data_format=self.data_format)
       if is_training:
         x = tf.nn.dropout(x, self.keep_prob)
       with tf.variable_scope("fc"):
@@ -351,8 +355,8 @@ def _enas_layer(self, layer_id, prev_layers, start_idx, out_filters, is_training
           branches = tf.reshape(branches, [N, -1, H, W])
         out = tf.nn.conv2d(
           branches, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-        out = batch_norm(out, is_training, data_format=self.data_format)
-        out = tf.nn.relu(out)
+        out = norm(out, is_training, data_format=self.data_format)
+        out = tf.nn.elu(out)
 
     if layer_id > 0:
       if self.whole_channels:
@@ -368,7 +372,7 @@ def _enas_layer(self, layer_id, prev_layers, start_idx, out_filters, is_training
                                     lambda: tf.zeros_like(prev_layers[i])))
         res_layers.append(out)
         out = tf.add_n(res_layers)
-        out = batch_norm(out, is_training, data_format=self.data_format)
+        out = norm(out, is_training, data_format=self.data_format)
 
     return out
 
@@ -396,17 +400,17 @@ def _fixed_layer(
         filter_size = size[count]
         with tf.variable_scope("conv_1x1"):
           w = create_weight("w", [1, 1, inp_c, out_filters])
-          out = tf.nn.relu(inputs)
+          out = tf.nn.elu(inputs)
           out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME",
                              data_format=self.data_format)
-          out = batch_norm(out, is_training, data_format=self.data_format)
+          out = norm(out, is_training, data_format=self.data_format)
 
         with tf.variable_scope("conv_{0}x{0}".format(filter_size)):
           w = create_weight("w", [filter_size, filter_size, out_filters, out_filters])
-          out = tf.nn.relu(out)
+          out = tf.nn.elu(out)
           out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME",
                              data_format=self.data_format)
-          out = batch_norm(out, is_training, data_format=self.data_format)
+          out = norm(out, is_training, data_format=self.data_format)
       elif count == 4:
         pass
       elif count == 5:
@@ -449,10 +453,10 @@ def _fixed_layer(
           branches = tf.concat(branches, axis=3)
         elif self.data_format == "NCHW":
           branches = tf.concat(branches, axis=1)
-        out = tf.nn.relu(branches)
+        out = tf.nn.elu(branches)
         out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME",
                            data_format=self.data_format)
-        out = batch_norm(out, is_training, data_format=self.data_format)
+        out = norm(out, is_training, data_format=self.data_format)
 
     if layer_id > 0:
       if self.whole_channels:
@@ -477,10 +481,10 @@ def _fixed_layer(
       with tf.variable_scope("skip"):
         w = create_weight(
           "w", [1, 1, total_skip_channels * out_filters, out_filters])
-        out = tf.nn.relu(out)
+        out = tf.nn.elu(out)
         out = tf.nn.conv2d(
           out, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-        out = batch_norm(out, is_training, data_format=self.data_format)
+        out = norm(out, is_training, data_format=self.data_format)
 
     return out
 
@@ -504,8 +508,8 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters,
     with tf.variable_scope("inp_conv_1"):
       w = create_weight("w", [1, 1, inp_c, out_filters])
       x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-      x = batch_norm(x, is_training, data_format=self.data_format)
-      x = tf.nn.relu(x)
+      x = norm(x, is_training, data_format=self.data_format)
+      x = tf.nn.elu(x)
 
     with tf.variable_scope("out_conv_{}".format(filter_size)):
       if start_idx is None:
@@ -515,12 +519,13 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters,
           w_point = create_weight("w_point", [1, 1, out_filters * ch_mul, count])
           x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1],
                                      padding="SAME", data_format=self.data_format)
-          x = batch_norm(x, is_training, data_format=self.data_format)
+          x = norm(x, is_training, data_format=self.data_format)
         else:
           w = create_weight("w", [filter_size, filter_size, inp_c, count])
           x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-          x = batch_norm(x, is_training, data_format=self.data_format)
+          x = norm(x, is_training, data_format=self.data_format)
       else:
+        print('TODO(ahundt) batch_norm_with_mask is definitely called... make a group norm version!')
         if separable:
           w_depth = create_weight("w_depth", [filter_size, filter_size, out_filters, ch_mul])
           w_point = create_weight("w_point", [out_filters, out_filters * ch_mul])
@@ -544,7 +549,7 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters,
           mask = tf.logical_and(start_idx <= mask, mask < start_idx + count)
           x = batch_norm_with_mask(
             x, is_training, mask, out_filters, data_format=self.data_format)
-      x = tf.nn.relu(x)
+      x = tf.nn.elu(x)
     return x
 
   def _pool_branch(self, inputs, is_training, count, avg_or_max, start_idx=None):
@@ -566,8 +571,8 @@ def _pool_branch(self, inputs, is_training, count, avg_or_max, start_idx=None):
     with tf.variable_scope("conv_1"):
       w = create_weight("w", [1, 1, inp_c, self.out_filters])
       x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
-      x = batch_norm(x, is_training, data_format=self.data_format)
-      x = tf.nn.relu(x)
+      x = norm(x, is_training, data_format=self.data_format)
+      x = tf.nn.elu(x)
 
     with tf.variable_scope("pool"):
       if self.data_format == "NHWC":