diff --git a/enas/cifar10/block_stacking_reader.py b/enas/cifar10/block_stacking_reader.py new file mode 100644 index 0000000..738ba71 --- /dev/null +++ b/enas/cifar10/block_stacking_reader.py @@ -0,0 +1,815 @@ + +import h5py +import os +import io +import sys +import glob +import traceback +from PIL import Image +from skimage.transform import resize + +import numpy as np +from numpy.random import RandomState +import json +import keras +from keras.utils import Sequence +from keras.utils import OrderedEnqueuer +import tensorflow as tf +import grasp_metrics +import keras_applications +import keras_preprocessing + + +def random_eraser(input_img, p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=True): + """ Cutout and random erasing algorithms for data augmentation + + source: + https://github.com/yu4u/cutout-random-erasing/blob/master/random_eraser.py + """ + img_h, img_w, img_c = input_img.shape + p_1 = np.random.rand() + + if p_1 > p: + return input_img + + while True: + s = np.random.uniform(s_l, s_h) * img_h * img_w + r = np.random.uniform(r_1, r_2) + w = int(np.sqrt(s / r)) + h = int(np.sqrt(s * r)) + left = np.random.randint(0, img_w) + top = np.random.randint(0, img_h) + + if left + w <= img_w and top + h <= img_h: + break + + if pixel_level: + c = np.random.uniform(v_l, v_h, (h, w, img_c)) + else: + c = np.random.uniform(v_l, v_h) + + input_img[top:top + h, left:left + w, :] = c + + return input_img + + +def tile_vector_as_image_channels_np(vector_op, image_shape): + """ + Takes a vector of length n and an image shape BHWC, + and repeat the vector as channels at each pixel. + + # Params + + vector_op: A tensor vector to tile. + image_shape: A list of integers [width, height] with the desired dimensions. + """ + # input vector shape + ivs = np.shape(vector_op) + # reshape the vector into a single pixel + vector_pixel_shape = [ivs[0], 1, 1, ivs[1]] + vector_op = np.reshape(vector_op, vector_pixel_shape) + # tile the pixel into a full image + tile_dimensions = [1, image_shape[1], image_shape[2], 1] + vector_op = np.tile(vector_op, tile_dimensions) + # if K.backend() is 'tensorflow': + # output_shape = [ivs[0], image_shape[1], image_shape[2], ivs[1]] + # vector_op.set_shape(output_shape) + return vector_op + + +def concat_images_with_tiled_vector_np(images, vector): + """Combine a set of images with a vector, tiling the vector at each pixel in the images and concatenating on the channel axis. + + # Params + + images: list of images with the same dimensions + vector: vector to tile on each image. If you have + more than one vector, simply concatenate them + all before calling this function. + + # Returns + + """ + if not isinstance(images, list): + images = [images] + image_shape = np.shape(images[0]) + tiled_vector = tile_vector_as_image_channels_np(vector, image_shape) + images.append(tiled_vector) + combined = np.concatenate(images, axis=-1) + + return combined + + +def concat_unit_meshgrid_np(tensor): + """ Concat unit meshgrid onto the tensor. + + This is roughly equivalent to the input in uber's coordconv. + TODO(ahundt) concat_unit_meshgrid_np is untested. + """ + assert len(tensor.shape) == 4 + # print('tensor shape: ' + str(tensor.shape)) + y_size = tensor.shape[1] + x_size = tensor.shape[2] + max_value = max(x_size, y_size) + y, x = np.meshgrid(np.arange(y_size), + np.arange(x_size), + indexing='ij') + assert y.size == x.size and y.size == tensor.shape[1] * tensor.shape[2] + # print('x shape: ' + str(x.shape) + ' y shape: ' + str(y.shape)) + # rescale data and reshape to have the same dimension as the tensor + y = np.reshape(y / max_value, [1, y.shape[0], y.shape[1], 1]) + x = np.reshape(x / max_value, [1, x.shape[0], x.shape[1], 1]) + + # need to have a meshgrid for each example in the batch, + # so tile along batch axis + tile_dimensions = [tensor.shape[0], 1, 1, 1] + y = np.tile(y, tile_dimensions) + x = np.tile(x, tile_dimensions) + combined = np.concatenate([tensor, y, x], axis=-1) + return combined + + +def blend_images_np(image, image2, alpha=0.5): + """Draws image2 on an image. + Args: + image: uint8 numpy array with shape (img_height, img_height, 3) + image2: a uint8 numpy array of shape (img_height, img_height) with + values between either 0 or 1. + color: color to draw the keypoints with. Default is red. + alpha: transparency value between 0 and 1. (default: 0.4) + Raises: + ValueError: On incorrect data type for image or image2s. + """ + if image.dtype != np.uint8: + raise ValueError('`image` not of type np.uint8') + if image2.dtype != np.uint8: + raise ValueError('`image2` not of type np.uint8') + if image.shape[:2] != image2.shape[:2]: + raise ValueError('The image has spatial dimensions %s but the image2 has ' + 'dimensions %s' % (image.shape[:2], image2.shape[:2])) + pil_image = Image.fromarray(image) + pil_image2 = Image.fromarray(image2) + + pil_image = Image.blend(pil_image, pil_image2, alpha) + np.copyto(image, np.array(pil_image.convert('RGB'))) + return image + + +def blend_image_sequence(images, alpha=0.5, verbose=0): + """ Blend past goal images + """ + blended_image = images[0] + if len(images) > 1: + for image in images[1:]: + if verbose > 1: + print('image type: ' + str(type(image)) + ' dtype: ' + str(image.dtype)) + blended_image = blend_images_np(blended_image, image) + return blended_image + + +def get_past_goal_indices(current_robot_time_index, goal_indices, filename='', verbose=0): + """ get past goal image indices, including the initial image + + # Arguments + + current_robot_time_index: the index of the current "robot time" being simulated + goal_indices: a list of goal time indices for every robot time + + # Returns + + A list of indices representing all the goal time steps + """ + image_indices = [0] + total_goal_indices = len(goal_indices) + if verbose: + print('total images: ' + str(total_goal_indices)) + image_index = 1 + while image_index < current_robot_time_index and image_index < total_goal_indices: + if verbose > 0: + print('image_index: ' + str(image_index)) + goal_image_index = goal_indices[image_index] + if goal_image_index < current_robot_time_index and goal_image_index < total_goal_indices: + if verbose > 0: + print('goal_indices[goal_image_index]: ' + str(goal_indices[goal_image_index])) + image_indices += [goal_image_index] + if goal_image_index <= goal_indices[goal_image_index]: + image_index += 1 + # TODO(ahundt) understand the cause of the warning below, modify the preprocessing script to correct it + elif goal_image_index >= total_goal_indices and verbose > 0: + print('block_stacking_reader.py::get_past_goal_indices(): warning, goal index equals ' + 'or exceeds total_goal_indices. filename: ' + str(filename) + + ' goal_image_index: ' + str(goal_image_index) + + ' total_goal_indices: ' + str(total_goal_indices)) + image_index = goal_image_index + return image_indices + + +def encode_label(label_features_to_extract, y, action_successes=None, random_augmentation=None, current_stacking_reward=None): + """ Encode a label based on the features that need to be extracted from the pose y. + + y: list of poses in [[x, y, z, qx, qy, qz, qw]] format + action_successes: list of labels with successful actions + """ + # determine the label + if label_features_to_extract is None or 'grasp_goal_xyz_3' in label_features_to_extract: + # regression to translation case, see semantic_translation_regression in cornell_grasp_train.py + y = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc(y, random_augmentation=random_augmentation) + y = y[:, :3] + elif label_features_to_extract is None or 'grasp_goal_aaxyz_nsc_5' in label_features_to_extract: + # regression to rotation case, see semantic_rotation_regression in cornell_grasp_train.py + y = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc(y, random_augmentation=random_augmentation) + y = y[:, 3:] + elif label_features_to_extract is None or 'grasp_goal_xyz_aaxyz_nsc_8' in label_features_to_extract: + # default, regression label case + y = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc(y, random_augmentation=random_augmentation) + elif 'grasp_success' in label_features_to_extract or 'action_success' in label_features_to_extract: + if action_successes is None: + raise ValueError( + 'encode_label() was not provided with action_successes, ' + 'which should contain data about the future outcome of the action.') + # classification label case + y = action_successes + elif 'stacking_reward' in label_features_to_extract: + y = current_stacking_reward + else: + raise ValueError('Unsupported label_features_to_extract: ' + str(label_features_to_extract)) + return y + + +def encode_action_and_images( + data_features_to_extract, + poses, + action_labels, + init_images, + current_images, + y=None, + random_augmentation=None, + encoded_goal_pose=None, + epsilon=1e-3): + """ Given an action and images, return the combined input object performing prediction with keras. + + data_features_to_extract: A string identifier for the encoding to use for the actions and images. + Options include: 'image_0_image_n_vec_xyz_aaxyz_nsc_15', 'image_0_image_n_vec_xyz_10', + 'current_xyz_aaxyz_nsc_8', 'current_xyz_3', 'proposed_goal_xyz_aaxyz_nsc_8'. + action_labels: batch of already one-hot or floating point encoded action label + init_images: batch of clear view images, the initial in the time series. + These should already be the appropriate size and rgb values in the range [0, 255]. + current_images: batch of current image in the time series. + These should already be the appropriate size and rgb values in the range [0, 255]. + y: labels, particularly useful when classifying the quality of a regressed action. + random_augmentation: None has no effect, if given a float from 0 to 1 + it will modify the poses with a small amount of translation and rotation + with the probablity specified by the provided floating point number. + encoded_goal_pose: A pre-encoded goal pose for use in actor/critic classification of proposals. + """ + + action_labels = np.array(action_labels) + init_images = keras_applications.imagenet_utils._preprocess_numpy_input( + np.array(init_images, dtype=np.float32), + data_format='channels_last', mode='tf') + current_images = keras_applications.imagenet_utils._preprocess_numpy_input( + np.array(current_images, dtype=np.float32), + data_format='channels_last', mode='tf') + poses = np.array(poses) + + # print('poses shape: ' + str(poses.shape)) + encoded_poses = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc( + poses, random_augmentation=random_augmentation) + + if np.any(encoded_poses < 0 - epsilon) or np.any(encoded_poses > 1 + epsilon): + raise ValueError('An encoded pose was outside the [0,1] range! Update your encoding. poses: ' + + str(poses) + ' encoded poses: ' + str(encoded_poses)) + + if (data_features_to_extract is None or + 'current_xyz_3' in data_features_to_extract or + 'image_0_image_n_vec_xyz_10' in data_features_to_extract or + 'image_0_image_n_vec_xyz_nxygrid_12' in data_features_to_extract): + # regression input case for translation only + action_poses_vec = np.concatenate([encoded_poses[:, :3], action_labels], axis=-1) + X = [init_images, current_images, action_poses_vec] + elif (data_features_to_extract is None or + 'current_xyz_aaxyz_nsc_8' in data_features_to_extract or + 'image_0_image_n_vec_xyz_aaxyz_nsc_15' in data_features_to_extract or + 'image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17' in data_features_to_extract): + # default, regression input case for translation and rotation + action_poses_vec = np.concatenate([encoded_poses, action_labels], axis=-1) + X = [init_images, current_images, action_poses_vec] + elif(data_features_to_extract is None or 'image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25' in data_features_to_extract): + # this is for classification of actions + action_poses_vec = np.concatenate([encoded_poses, encoded_goal_pose, action_labels], axis=-1) + X = [init_images, current_images, action_poses_vec] + elif 'proposed_goal_xyz_aaxyz_nsc_8' in data_features_to_extract: + # classification input case + proposed_and_current_action_vec = np.concatenate([encoded_poses, action_labels, y], axis=-1) + X = [init_images, current_images, proposed_and_current_action_vec] + + else: + raise ValueError('Unsupported data input: ' + str(data_features_to_extract)) + + if (data_features_to_extract is not None and + ('image_0_image_n_vec_xyz_10' in data_features_to_extract or + 'image_0_image_n_vec_xyz_aaxyz_nsc_15' in data_features_to_extract or + 'image_0_image_n_vec_xyz_nxygrid_12' in data_features_to_extract or + 'image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17' in data_features_to_extract or + 'image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25' in data_features_to_extract)): + # make the giant data cube if it is requested + vec = np.squeeze(X[2:]) + assert len(vec.shape) == 2, 'we only support a 2D input vector for now but found shape:' + str(vec.shape) + X = concat_images_with_tiled_vector_np(X[:2], vec) + + + # check if any of the data features expect nxygrid normalized x, y coordinate grid values + grid_labels = [s for s in data_features_to_extract if 'nxygrid' in s] + # print('grid labels: ' + str(grid_labels)) + if (data_features_to_extract is not None and grid_labels): + X = concat_unit_meshgrid_np(X) + return X + + +def inference_mode_gen(file_names): + """ Generate data for all time steps in a single example. + """ + file_list_updated = [] + # print(len(file_names)) + for f_name in file_names: + with h5py.File(f_name, 'r') as data: + file_len = len(data['gripper_action_goal_idx']) - 1 + # print(file_len) + list_id = [f_name] * file_len + file_list_updated = file_list_updated + list_id + return file_list_updated + + +class CostarBlockStackingSequence(Sequence): + '''Generates a batch of data from the stacking dataset. + + # TODO(ahundt) match the preprocessing /augmentation apis of cornell & google dataset + ''' + def __init__(self, list_example_filenames, + label_features_to_extract=None, data_features_to_extract=None, + total_actions_available=41, + batch_size=32, shuffle=False, seed=0, + random_state=None, + is_training=True, random_augmentation=None, + random_shift=False, + output_shape=None, + blend_previous_goal_images=False, + estimated_time_steps_per_example=250, verbose=0, inference_mode=False, one_hot_encoding=False): + '''Initialization + + # Arguments + + list_Ids: a list of file paths to be read + batch_size: specifies the size of each batch + shuffle: boolean to specify shuffle after each epoch + seed: a random seed to use. If seed is None it will be in order! + random_state: A numpy RandomState object, if not provided one will be generated from the seed. + Used exclusively for example data ordering and the indices to visit within an example. + # TODO(ahundt) better notes about the two parameters below. See choose_features_and_metrics() in cornell_grasp_trin.py. + label_features_to_extract: defaults to regression options, classification options are also available + data_features_to_extract: defaults to regression options, classification options are also available + Options include 'image_0_image_n_vec_xyz_aaxyz_nsc_15' which is a giant NHWC cube of image and pose data, + 'current_xyz_aaxyz_nsc_8' a vector with the current pose, + 'proposed_goal_xyz_aaxyz_nsc_8' a pose at the end of the current action (for classification cases), + 'image_0_image_n_vec_xyz_nxygrid_12' another giant cube without rotation and with explicit normalized xy coordinates, + 'image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17' another giant cube with rotation and explicit normalized xy coordinates. + random_augmentation: None or a float value between 0 and 1 indiciating how frequently random augmentation should be applied. + estimated_time_steps_per_example: The number of images in each example varies, + so we simply sample in proportion to an estimated number of images per example. + Due to random sampling, there is no guarantee that every image will be visited once! + However, the images can be visited in a fixed order, particularly when is_training=False. + one_hot_encoding flag triggers one hot encoding and thus numbers at the end of labels might not correspond to the actual size. + + # Explanation of abbreviations: + + aaxyz_nsc: is an axis and angle in xyz order, where the angle is defined by a normalized sin(theta) cos(theta). + nxygrid: at each pixel, concatenate two additional channels containing the pixel coordinate x and y as values between 0 and 1. + This is similar to uber's "coordconv" paper. + ''' + if random_state is None: + random_state = RandomState(seed) + self.batch_size = batch_size + self.list_example_filenames = list_example_filenames + self.shuffle = shuffle + self.seed = seed + self.random_state = random_state + self.output_shape = output_shape + self.is_training = is_training + self.verbose = verbose + self.on_epoch_end() + if isinstance(label_features_to_extract, str): + label_features_to_extract = [label_features_to_extract] + self.label_features_to_extract = label_features_to_extract + # TODO(ahundt) total_actions_available can probably be extracted from the example hdf5 files and doesn't need to be a param + if isinstance(data_features_to_extract, str): + data_features_to_extract = [data_features_to_extract] + self.data_features_to_extract = data_features_to_extract + self.total_actions_available = total_actions_available + self.random_augmentation = random_augmentation + self.random_shift = random_shift + self.inference_mode = inference_mode + self.infer_index = 0 + self.one_hot_encoding = one_hot_encoding + + self.blend = blend_previous_goal_images + self.estimated_time_steps_per_example = estimated_time_steps_per_example + if self.inference_mode is True: + self.list_example_filenames = inference_mode_gen(self.list_example_filenames) + # if crop_shape is None: + # # height width 3 + # crop_shape = (224, 224, 3) + # self.crop_shape = crop_shape + + def __len__(self): + """Denotes the number of batches per epoch + """ + return int(np.floor(len(self.list_example_filenames) / self.batch_size)) + + def __getitem__(self, index): + """Generate one batch of data + """ + # Generate indexes of the batch + indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] + if self.verbose > 0: + print("batch getitem indices:" + str(indexes)) + # Find list of example_filenames + list_example_filenames_temp = [self.list_example_filenames[k] for k in indexes] + # Generate data + self.infer_index = self.infer_index + 1 + X, y = self.__data_generation(list_example_filenames_temp, self.infer_index) + + return X, y + + def get_estimated_time_steps_per_example(self): + """ Get the estimated images per example, + + Run extra steps in proportion to this if you want to get close to visiting every image. + """ + return self.estimated_time_steps_per_example + + def on_epoch_end(self): + """ Updates indexes after each epoch + """ + if self.seed is not None and not self.is_training: + # repeat the same order if we're validating or testing + # continue the large random sequence for training + self.random_state.seed(self.seed) + self.indexes = np.arange(len(self.list_example_filenames)) + if self.shuffle is True: + self.random_state.shuffle(self.indexes) + + def __data_generation(self, list_Ids, images_index): + """ Generates data containing batch_size samples + + # Arguments + + list_Ids: a list of file paths to be read + """ + + def JpegToNumpy(jpeg): + stream = io.BytesIO(jpeg) + im = np.asarray(Image.open(stream)) + try: + return im.astype(np.uint8) + except(TypeError) as exception: + print("Failed to convert PIL image type", exception) + print("type ", type(im), "len ", len(im)) + + def ConvertImageListToNumpy(data, format='numpy', data_format='NHWC'): + """ Convert a list of binary jpeg or png files to numpy format. + + # Arguments + + data: a list of binary jpeg images to convert + format: default 'numpy' returns a 4d numpy array, + 'list' returns a list of 3d numpy arrays + """ + length = len(data) + imgs = [] + for raw in data: + img = JpegToNumpy(raw) + if data_format == 'NCHW': + img = np.transpose(img, [2, 0, 1]) + imgs.append(img) + if format == 'numpy': + imgs = np.array(imgs) + return imgs + try: + # Initialization + if self.verbose > 0: + print("generating batch: " + str(list_Ids)) + X = [] + init_images = [] + current_images = [] + poses = [] + goal_pose = [] + y = [] + action_labels = [] + action_successes = [] + example_filename = '' + if isinstance(list_Ids, int): + # if it is just a single int + # make it a list so we can iterate + list_Ids = [list_Ids] + + # Generate data + for i, example_filename in enumerate(list_Ids): + example_filename = os.path.expanduser(example_filename) + if self.verbose > 0: + print('reading: ' + str(i) + ' path: ' + str(example_filename)) + # Store sample + # X[i,] = np.load('data/' + example_filename + '.npy') + x = () + try: + if not os.path.isfile(example_filename): + raise ValueError('CostarBlockStackingSequence: Trying to open something which is not a file: ' + str(example_filename)) + with h5py.File(example_filename, 'r') as data: + if 'gripper_action_goal_idx' not in data or 'gripper_action_label' not in data: + raise ValueError('block_stacking_reader.py: You need to run preprocessing before this will work! \n' + + ' python2 ctp_integration/scripts/view_convert_dataset.py --path ~/.keras/datasets/costar_block_stacking_dataset_v0.4 --preprocess_inplace gripper_action --write' + '\n File with error: ' + str(example_filename)) + # indices = [0] + # len of goal indexes is the same as the number of images, so this saves loading all the images + all_goal_ids = np.array(data['gripper_action_goal_idx']) + if('stacking_reward' in self.label_features_to_extract): + # TODO(ahundt) move this check out of the stacking reward case after files have been updated + if all_goal_ids[-1] > len(all_goal_ids): + raise ValueError(' File contains goal id greater than total number of frames ' + str(example_filename)) + if len(all_goal_ids) < 2: + print('block_stacking_reader.py: ' + str(len(all_goal_ids)) + ' goal indices in this file, skipping: ' + example_filename) + if 'success' in example_filename: + label_constant = 1 + else: + label_constant = 0 + stacking_reward = np.arange(len(all_goal_ids)) + stacking_reward = 0.999 * stacking_reward * label_constant + # print("reward estimates", stacking_reward) + + if self.seed is not None: + rand_max = len(all_goal_ids) - 1 + if rand_max <= 1: + print('CostarBlockStackingSequence: not enough goal ids: ' + str(all_goal_ids) + ' file: ' + str(rand_max)) + image_indices = self.random_state.randint(1, rand_max, 1) + else: + raise NotImplementedError + indices = [0] + list(image_indices) + + if self.blend: + img_indices = get_past_goal_indices(image_indices, all_goal_ids, filename=example_filename) + else: + img_indices = indices + if self.inference_mode is True: + if images_index >= len(data['gripper_action_goal_idx']): + self.infer_index = 1 + image_idx = 1 + # image_idx = (images_index % (len(data['gripper_action_goal_idx']) - 1)) + 1 + else: + image_idx = images_index + + img_indices = [0, image_idx] + # print("image_index", image_idx) + # print("image_true", images_index, len(data['gripper_action_goal_idx'])) + # print("new_indices-----", image_idx) + if self.verbose > 0: + print("Indices --", indices) + print('img_indices: ' + str(img_indices)) + rgb_images = list(data['image'][img_indices]) + rgb_images = ConvertImageListToNumpy(rgb_images, format='numpy') + + if self.blend: + # TODO(ahundt) move this to after the resize loop for a speedup + blended_image = blend_image_sequence(rgb_images) + rgb_images = [rgb_images[0], blended_image] + # resize using skimage + rgb_images_resized = [] + for k, images in enumerate(rgb_images): + if (self.is_training and self.random_augmentation is not None and + self.random_shift and np.random.random() > self.random_augmentation): + # apply random shift to the images before resizing + images = keras_preprocessing.image.random_shift( + images, + # height, width + 1./(48. * 2.), 1./(64. * 2.), + row_axis=0, col_axis=1, channel_axis=2) + # TODO(ahundt) improve crop/resize to match cornell_grasp_dataset_reader + if self.output_shape is not None: + resized_image = resize(images, self.output_shape, mode='constant', preserve_range=True) + else: + resized_image = images + if self.is_training and self.random_augmentation: + # do some image augmentation with random erasing & cutout + resized_image = random_eraser(resized_image) + rgb_images_resized.append(resized_image) + + init_images.append(rgb_images_resized[0]) + current_images.append(rgb_images_resized[1]) + poses.append(np.array(data['pose'][indices[1:]])[0]) + if(self.data_features_to_extract is not None and 'image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25' in self.data_features_to_extract): + next_goal_idx = all_goal_ids[indices[1:][0]] + goal_pose.append(np.array(data['pose'][next_goal_idx])) + print("final pose added", goal_pose) + current_stacking_reward = stacking_reward[indices[1]] + print("reward estimate", current_stacking_reward) + # x = x + tuple([rgb_images[indices]]) + # x = x + tuple([np.array(data['pose'])[indices]]) + + if (self.data_features_to_extract is not None and + ('image_0_image_n_vec_xyz_aaxyz_nsc_15' in self.data_features_to_extract or + 'image_0_image_n_vec_xyz_nxygrid_12' in self.data_features_to_extract or + 'image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17' in self.data_features_to_extract or + 'image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25' in self.data_features_to_extract) and not self.one_hot_encoding): + # normalized floating point encoding of action vector + # from 0 to 1 in a single float which still becomes + # a 2d array of dimension batch_size x 1 + # np.expand_dims(data['gripper_action_label'][indices[1:]], axis=-1) / self.total_actions_available + for j in indices[1:]: + action = [float(data['gripper_action_label'][j] / self.total_actions_available)] + action_labels.append(action) + else: + # one hot encoding + for j in indices[1:]: + # generate the action label one-hot encoding + action = np.zeros(self.total_actions_available) + action[data['gripper_action_label'][j]] = 1 + action_labels.append(action) + # action_labels = np.array(action_labels) + + # print(action_labels) + # x = x + tuple([action_labels]) + # X.append(x) + # action_labels = np.unique(data['gripper_action_label']) + # print(np.array(data['labels_to_name']).shape) + # X.append(np.array(data['pose'])[indices]) + + # Store class + label = () + # change to goals computed + index1 = indices[1] + goal_ids = all_goal_ids[index1] + # print(index1) + label = np.array(data['pose'])[goal_ids] + # print(type(label)) + # for items in list(data['all_tf2_frames_from_base_link_vec_quat_xyzxyzw_json'][indices]): + # json_data = json.loads(items.decode('UTF-8')) + # label = label + tuple([json_data['gripper_center']]) + # print(np.array(json_data['gripper_center'])) + # print(json_data.keys()) + # y.append(np.array(json_data['camera_rgb_frame'])) + if('stacking_reward' in self.label_features_to_extract): + # print(y) + y.append(current_stacking_reward) + else: + y.append(label) + if 'success' in example_filename: + action_successes = action_successes + [1] + else: + action_successes = action_successes + [0] + # print("y = ", y) + except IOError as ex: + print('Error: Skipping file due to IO error when opening ' + + example_filename + ': ' + str(ex) + ' using the last example twice for batch') + + action_labels = np.array(action_labels) + init_images = keras_applications.imagenet_utils._preprocess_numpy_input( + np.array(init_images, dtype=np.float32), + data_format='channels_last', mode='tf') + current_images = keras_applications.imagenet_utils._preprocess_numpy_input( + np.array(current_images, dtype=np.float32), + data_format='channels_last', mode='tf') + poses = np.array(poses) + + # print('poses shape: ' + str(poses.shape)) + encoded_poses = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc( + poses, random_augmentation=self.random_augmentation) + if self.data_features_to_extract is None or 'image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25': + encoded_goal_pose = grasp_metrics.batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc( + poses, random_augmentation=self.random_augmentation) + # encoded_poses = np.array([encoded_poses, encoded_goal_pose]) + + # print('encoded poses shape: ' + str(encoded_poses.shape)) + # print('action labels shape: ' + str(action_labels.shape)) + # print('encoded poses vec shape: ' + str(action_poses_vec.shape)) + # print("---",init_images.shape) + # init_images = tf.image.resize_images(init_images,[224,224]) + # current_images = tf.image.resize_images(current_images,[224,224]) + # print("---",init_images.shape) + # X = init_images + X = encode_action_and_images( + data_features_to_extract=self.data_features_to_extract, + poses=poses, action_labels=action_labels, + init_images=init_images, current_images=current_images, + y=y, random_augmentation=self.is_training) + + # print("type=======",type(X)) + # print("shape=====",X.shape) + + # determine the label + if('stacking_reward' in self.label_features_to_extract): + y = encode_label(self.label_features_to_extract, y, action_successes, self.random_augmentation, current_stacking_reward) + else: + y = encode_label(self.label_features_to_extract, y, action_successes, self.random_augmentation, None) + + # Debugging checks + if X is None: + raise ValueError('Unsupported input data for X: ' + str(x)) + if y is None: + raise ValueError('Unsupported input data for y: ' + str(x)) + + # Assemble the data batch + batch = (X, y) + + if self.verbose > 0: + # diff should be nonzero for most timesteps except just before the gripper closes! + print('encoded current poses: ' + str(poses) + ' labels: ' + str(y)) + # commented next line due to dimension issue + # + ' diff: ' + str(poses - y)) + print("generated batch: " + str(list_Ids)) + except Exception as ex: + print('CostarBlockStackingSequence: Keras will often swallow exceptions without a stack trace, ' + 'so we are printing the stack trace here before re-raising the error.') + ex_type, ex, tb = sys.exc_info() + traceback.print_tb(tb) + # deletion must be explicit to prevent leaks + # https://stackoverflow.com/a/16946886/99379 + del tb + raise + + return batch + + +def block_stacking_generator(sequence): + + # training_generator = CostarBlockStackingSequence(filenames, batch_size=1) + epoch_size = len(sequence) + step = 0 + while True: + if step > epoch_size: + step = 0 + sequence.on_epoch_end() + batch = sequence.__getitem__(step) + print(np.array(batch).shape) + print(np.array(batch[0][0]).shape) + exit() + step += 1 + yield batch + + +if __name__ == "__main__": + visualize = False + output_shape = (224, 224, 3) + # output_shape = None + tf.enable_eager_execution() + filenames = glob.glob(os.path.expanduser('~/.keras/datasets/costar_block_stacking_dataset_v0.4/*success.h5f')) + # print(filenames) + # filenames_new = inference_mode_gen(filenames) + training_generator = CostarBlockStackingSequence( + filenames, batch_size=1, verbose=1, + output_shape=output_shape, + label_features_to_extract='grasp_goal_xyz_aaxyz_nsc_8', + data_features_to_extract=['current_xyz_aaxyz_nsc_8'], + blend_previous_goal_images=False, inference_mode=False) + num_batches = len(training_generator) + print(num_batches) + # print(len(filenames_new)) + + bsg = block_stacking_generator(training_generator) + iter(bsg) + from tqdm import tqdm as tqdm + progress = tqdm(range(num_batches)) + for i in progress: + data = next(bsg) + progress.set_description('step: ' + str(i) + ' data type: ' + str(type(data))) + + if visualize: + import matplotlib + import matplotlib.pyplot as plt + # clear view image + plt.imshow((np.squeeze(data[0][0]) / 2.0) + 0.5) + plt.draw() + plt.pause(0.25) + # current timestep image + plt.imshow((np.squeeze(data[0][1]) / 2.0) + 0.5) + plt.draw() + plt.pause(0.25) + # uncomment the following line to wait for + # one window to be closed before showing the next + # plt.show() + # a = next(training_generator) + enqueuer = OrderedEnqueuer( + training_generator, + use_multiprocessing=False, + shuffle=True) + enqueuer.start(workers=1, max_queue_size=1) + generator = iter(enqueuer.get()) + print("-------------------") + generator_ouput = next(generator) + print("-------------------op") + x, y = generator_ouput + print("x-shape-----------", x.shape) + print("y-shape---------",y.shape) + + # X,y=training_generator.__getitem__(1) + #print(X.keys()) + # print(X[0].shape) + # print(X[0].shape) + # print(y[0]) diff --git a/enas/cifar10/data_utils.py b/enas/cifar10/data_utils.py index 1f8d615..f727f50 100644 --- a/enas/cifar10/data_utils.py +++ b/enas/cifar10/data_utils.py @@ -53,6 +53,7 @@ def _read_fmnist_data(data_path): labels["train"] = np.array(data.train.labels, dtype = np.int32) labels["test"] = np.array(data.test.labels, dtype = np.int32) print("Read and processed data..") + print(labels["test"]) return images, labels @@ -80,6 +81,9 @@ def read_data(data_path, num_valids=5000, dataset = "cifar"): images, labels = valid_split_data(images, labels, num_valids) return images, labels + if dataset == "stacking": + images["path"] = data_path + return images, labels else: train_files = [ "data_batch_1", diff --git a/enas/cifar10/general_child.py b/enas/cifar10/general_child.py index 9896c7f..cd4f598 100644 --- a/enas/cifar10/general_child.py +++ b/enas/cifar10/general_child.py @@ -12,10 +12,11 @@ from enas.cifar10.image_ops import conv from enas.cifar10.image_ops import fully_connected from enas.cifar10.image_ops import batch_norm +from enas.cifar10.image_ops import norm from enas.cifar10.image_ops import batch_norm_with_mask from enas.cifar10.image_ops import relu from enas.cifar10.image_ops import max_pool -from enas.cifar10.image_ops import global_avg_pool +from enas.cifar10.image_ops import global_max_pool from enas.utils import count_model_params from enas.utils import get_train_ops @@ -101,8 +102,10 @@ def _get_C(self, x): x: tensor of shape [N, H, W, C] or [N, C, H, W] """ if self.data_format == "NHWC": + assert x.get_shape().as_list()[3] is not None return x.get_shape()[3].value elif self.data_format == "NCHW": + assert x.get_shape().as_list()[1] is not None return x.get_shape()[1].value else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) @@ -112,6 +115,7 @@ def _get_HW(self, x): Args: x: tensor of shape [N, H, W, C] or [N, C, H, W] """ + assert x.get_shape().as_list()[2] is not None return x.get_shape()[2].value def _get_strides(self, stride): @@ -136,7 +140,7 @@ def _factorized_reduction(self, x, out_filters, stride, is_training): w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) + x = norm(x, is_training, data_format=self.data_format) return x stride_spec = self._get_strides(stride) @@ -171,7 +175,7 @@ def _factorized_reduction(self, x, out_filters, stride, is_training): # Concat and apply BN final_path = tf.concat(values=[path1, path2], axis=concat_axis) - final_path = batch_norm(final_path, is_training, + final_path = norm(final_path, is_training, data_format=self.data_format) return final_path @@ -194,11 +198,11 @@ def _model(self, images, is_training, reuse=False): layers = [] out_filters = self.out_filters - C = self._get_C(images) + C = self._get_C(images) with tf.variable_scope("stem_conv"): w = create_weight("w", [C, C, C, out_filters]) x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) + x = norm(x, is_training, data_format=self.data_format) layers.append(x) if self.whole_channels: @@ -229,7 +233,7 @@ def _model(self, images, is_training, reuse=False): start_idx += 2 * self.num_branches + layer_id print(layers[-1]) - x = global_avg_pool(x, data_format=self.data_format) + x = global_max_pool(x, data_format=self.data_format) if is_training: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): @@ -351,8 +355,8 @@ def _enas_layer(self, layer_id, prev_layers, start_idx, out_filters, is_training branches = tf.reshape(branches, [N, -1, H, W]) out = tf.nn.conv2d( branches, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - out = batch_norm(out, is_training, data_format=self.data_format) - out = tf.nn.relu(out) + out = norm(out, is_training, data_format=self.data_format) + out = tf.nn.elu(out) if layer_id > 0: if self.whole_channels: @@ -368,7 +372,7 @@ def _enas_layer(self, layer_id, prev_layers, start_idx, out_filters, is_training lambda: tf.zeros_like(prev_layers[i]))) res_layers.append(out) out = tf.add_n(res_layers) - out = batch_norm(out, is_training, data_format=self.data_format) + out = norm(out, is_training, data_format=self.data_format) return out @@ -396,17 +400,17 @@ def _fixed_layer( filter_size = size[count] with tf.variable_scope("conv_1x1"): w = create_weight("w", [1, 1, inp_c, out_filters]) - out = tf.nn.relu(inputs) + out = tf.nn.elu(inputs) out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - out = batch_norm(out, is_training, data_format=self.data_format) + out = norm(out, is_training, data_format=self.data_format) with tf.variable_scope("conv_{0}x{0}".format(filter_size)): w = create_weight("w", [filter_size, filter_size, out_filters, out_filters]) - out = tf.nn.relu(out) + out = tf.nn.elu(out) out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - out = batch_norm(out, is_training, data_format=self.data_format) + out = norm(out, is_training, data_format=self.data_format) elif count == 4: pass elif count == 5: @@ -449,10 +453,10 @@ def _fixed_layer( branches = tf.concat(branches, axis=3) elif self.data_format == "NCHW": branches = tf.concat(branches, axis=1) - out = tf.nn.relu(branches) + out = tf.nn.elu(branches) out = tf.nn.conv2d(out, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - out = batch_norm(out, is_training, data_format=self.data_format) + out = norm(out, is_training, data_format=self.data_format) if layer_id > 0: if self.whole_channels: @@ -477,10 +481,10 @@ def _fixed_layer( with tf.variable_scope("skip"): w = create_weight( "w", [1, 1, total_skip_channels * out_filters, out_filters]) - out = tf.nn.relu(out) + out = tf.nn.elu(out) out = tf.nn.conv2d( out, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - out = batch_norm(out, is_training, data_format=self.data_format) + out = norm(out, is_training, data_format=self.data_format) return out @@ -504,8 +508,8 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters, with tf.variable_scope("inp_conv_1"): w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - x = tf.nn.relu(x) + x = norm(x, is_training, data_format=self.data_format) + x = tf.nn.elu(x) with tf.variable_scope("out_conv_{}".format(filter_size)): if start_idx is None: @@ -515,12 +519,13 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters, w_point = create_weight("w_point", [1, 1, out_filters * ch_mul, count]) x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) + x = norm(x, is_training, data_format=self.data_format) else: w = create_weight("w", [filter_size, filter_size, inp_c, count]) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) + x = norm(x, is_training, data_format=self.data_format) else: + print('TODO(ahundt) batch_norm_with_mask is definitely called... make a group norm version!') if separable: w_depth = create_weight("w_depth", [filter_size, filter_size, out_filters, ch_mul]) w_point = create_weight("w_point", [out_filters, out_filters * ch_mul]) @@ -544,7 +549,7 @@ def _conv_branch(self, inputs, filter_size, is_training, count, out_filters, mask = tf.logical_and(start_idx <= mask, mask < start_idx + count) x = batch_norm_with_mask( x, is_training, mask, out_filters, data_format=self.data_format) - x = tf.nn.relu(x) + x = tf.nn.elu(x) return x def _pool_branch(self, inputs, is_training, count, avg_or_max, start_idx=None): @@ -566,8 +571,8 @@ def _pool_branch(self, inputs, is_training, count, avg_or_max, start_idx=None): with tf.variable_scope("conv_1"): w = create_weight("w", [1, 1, inp_c, self.out_filters]) x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - x = tf.nn.relu(x) + x = norm(x, is_training, data_format=self.data_format) + x = tf.nn.elu(x) with tf.variable_scope("pool"): if self.data_format == "NHWC": diff --git a/enas/cifar10/grasp_metrics.py b/enas/cifar10/grasp_metrics.py new file mode 100644 index 0000000..8d8cf5c --- /dev/null +++ b/enas/cifar10/grasp_metrics.py @@ -0,0 +1,1366 @@ + +import os +import copy +import math +import numpy as np +from tqdm import tqdm + +import keras +import tensorflow as tf +from tensorflow.python.platform import flags +from shapely.geometry import Polygon +from pyquaternion import Quaternion +import sklearn + +import grasp_utilities + +# class Vector: +# # http://www.mathopenref.com/coordpolygonarea.html +# # https://stackoverflow.com/a/45268241/99379 +# def __init__(self, x, y): +# self.x = x +# self.y = y + +# def __add__(self, v): +# if not isinstance(v, Vector): +# return NotImplemented +# return Vector(self.x + v.x, self.y + v.y) + +# def __sub__(self, v): +# if not isinstance(v, Vector): +# return NotImplemented +# return Vector(self.x - v.x, self.y - v.y) + +# def cross(self, v): +# if not isinstance(v, Vector): +# return NotImplemented +# return self.x*v.y - self.y*v.x + + +# class Line: +# # ax + by + c = 0 +# def __init__(self, v1, v2): +# self.a = v2.y - v1.y +# self.b = v1.x - v2.x +# self.c = v2.cross(v1) + +# def __call__(self, p): +# return self.a*p.x + self.b*p.y + self.c + +# def intersection(self, other): +# # http://www.mathopenref.com/coordpolygonarea.html +# # https://stackoverflow.com/a/45268241/99379 +# # See e.g. https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates +# if not isinstance(other, Line): +# return NotImplemented +# w = self.a*other.b - self.b*other.a +# return Vector( +# (self.b*other.c - self.c*other.b)/w, +# (self.c*other.a - self.a*other.c)/w +# ) + + +# def rectangle_vertices(cx, cy, w, h, theta): +# # http://www.mathopenref.com/coordpolygonarea.html +# # https://stackoverflow.com/a/45268241/99379 +# dx = w/2 +# dy = h/2 +# dxcos = dx*cos(theta) +# dxsin = dx*sin(theta) +# dycos = dy*cos(theta) +# dysin = dy*sin(theta) +# return ( +# Vector(cx, cy) + Vector(-dxcos - -dysin, -dxsin + -dycos), +# Vector(cx, cy) + Vector( dxcos - -dysin, dxsin + -dycos), +# Vector(cx, cy) + Vector( dxcos - dysin, dxsin + dycos), +# Vector(cx, cy) + Vector(-dxcos - dysin, -dxsin + dycos) +# ) + +# def intersection_area(r1, r2): +# # http://www.mathopenref.com/coordpolygonarea.html +# # https://stackoverflow.com/a/45268241/99379 +# # r1 and r2 are in (center, width, height, rotation) representation +# # First convert these into a sequence of vertices + +# rect0 = rectangle_vertices(*r1) +# rect1 = rectangle_vertices(*r2) + +# # Use the vertices of the first rectangle as +# # starting vertices of the intersection polygon. +# rect0 = rect0 + +# # Loop over the edges of the second rectangle +# for p, q in zip(rect1, rect1[1:] + rect1[:1]): +# if len(rect0) <= 2: +# break # No intersection + +# line = Line(p, q) + +# # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), +# # any point p with line(p) > 0 is on the "outside". + +# # Loop over the edges of the rect0 polygon, +# # and determine which part is inside and which is outside. +# new_intersection = [] +# line_values = [line(t) for t in rect0] +# for s, t, s_value, t_value in zip( +# rect0, rect0[1:] + rect0[:1], +# line_values, line_values[1:] + line_values[:1]): +# if s_value <= 0: +# new_intersection.append(s) +# if s_value * t_value < 0: +# # Points are on opposite sides. +# # Add the intersection of the lines to new_intersection. +# intersection_point = line.intersection(Line(s, t)) +# new_intersection.append(intersection_point) + +# intersection = new_intersection + +# # Calculate area +# if len(intersection) <= 2: +# return 0 + +# return 0.5 * sum(p.x*q.y - p.y*q.x for p, q in +# zip(intersection, intersection[1:] + intersection[:1])) + + +# intersection_area(r0y0, r0x0, r0y1, r0x1, r0y2, r0x2, r0y3, r0x3, r1y0, r1x0, r1y1, r1x1, r1y2, r1x2, r1y3, r1x3): +def rectangle_points(r0y0, r0x0, r0y1, r0x1, r0y2, r0x2, r0y3, r0x3): + p0yx = np.array([r0y0, r0x0]) + p1yx = np.array([r0y1, r0x1]) + p2yx = np.array([r0y2, r0x2]) + p3yx = np.array([r0y3, r0x3]) + return [p0yx, p1yx, p2yx, p3yx] + + +def rectangle_vectors(rp): + """ + # Arguments + + rp: rectangle points [p0yx, p1yx, p2yx, p3yx] + """ + v0 = rp[1] - rp[0] + v1 = rp[2] - rp[1] + v2 = rp[3] - rp[2] + v3 = rp[0] - rp[3] + + return [v0, v1, v2, v3] + + +def rectangle_homogeneous_lines(rv): + """ + + # Arguments + + rv: rectangle vectors [v0yx, v1yx, v2yx, v3yx] + + + # Returns + + [r0abc, r1abc, r2abc, r3abc] + + """ + # ax + by + c = 0 + dv = rv[0] - rv[1] + # TODO(ahundt) make sure cross product doesn't need to be in xy order + r0abc = K.concatenate([dv[0], dv[1], tf.cross(rv[0], rv[1])]) + dv = rv[1] - rv[2] + r1abc = K.concatenate([dv[1], dv[2], tf.cross(rv[1], rv[2])]) + dv = rv[2] - rv[3] + r2abc = K.concatenate([dv[2], dv[3], tf.cross(rv[2], rv[3])]) + dv = rv[3] - rv[0] + r3abc = K.concatenate([dv[3], dv[0], tf.cross(rv[3], rv[0])]) + return [r0abc, r1abc, r2abc, r3abc] + + +def homogeneous_line_intersection(hl0abc, hl1abc): + """ Given two homogenous lines return the intersection point in y,x coordinates + """ + a0 = hl0abc[0] + b0 = hl0abc[1] + c0 = hl0abc[2] + a1 = hl1abc[0] + b1 = hl1abc[1] + c1 = hl1abc[2] + w = a0 * b1 - b0 * a1 + py = (c0 * a1 - a0 * c1) / w + px = (b0 * c1 - c0 * b1) / w + return [py, px] + + +def line_at_point(l_abc, p_yx): + """ + + # Arguments + + l_abc: a line in homogenous coodinates + p_yx: a point with y, x coordinates + """ + return l_abc[0] * p_yx[1] + l_abc[1] * p_yx[0] + l_abc[2] + + +def intersection_points(rl0, rp1): + """ Evaluate rectangle lines at another rectangle's points + """ + lv = [ + line_at_point(rl0[0], rp1[0]), + line_at_point(rl0[1], rp1[1]), + line_at_point(rl0[2], rp1[2]), + line_at_point(rl0[3], rp1[3]), + ] + return lv + + +def rectangle_intersection_polygon(rp0, rl0, rp1, rl1): + """ Given two homogenous line rectangles, it returns the points for the polygon representing their intersection. + + # Arguments + + rp0: rectangle 0 defined with points + rl0: rectangle 0 defined with homogeneous lines + rp1: rectangle 1 defined with points + rp1: rectangle 1 defined with homogeneous lines + + # Returns + + Intersection polygon consisting of up to 8 points. + """ + # TODO(ahundt) this function is still set up for eager execution... figure it out as tf calls... + # http://www.mathopenref.com/coordpolygonarea.html + # https://stackoverflow.com/a/45268241/99379 + # Use the vertices of the first rectangle as + # starting vertices of the intersection polygon. + intersection = [] + for line1 in rl1: + line_values = [line_at_point(line1, t) for t in rp0] + + # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), + # any point p with line(p) > 0 is on the "outside". + + # Loop over the edges of the rect0 polygon, + # and determine which part is inside and which is outside. + new_intersection = [] + # points in rp0 rotated around by one + rp0_rot = grasp_utilities.rotate(rp0) + line_values_rot = grasp_utilities.rotate(line_values) + for s, t, s_value, t_value, line0 in zip( + rp0, rp0_rot, line_values, line_values_rot, rl0): + + if s_value <= 0: + new_intersection.append(s) + + st_value = s_value * t_value + intersection_point = homogeneous_line_intersection(line1, line0) + if st_value < 0: + # Points are on opposite sides. + # Add the intersection of the lines to new_intersection. + new_intersection.append(intersection_point) + + intersection = new_intersection + + return intersection + + +def polygon_area_four_points(rp): + """ + # Arguments + + rp: polygon defined by 4 points in y,x order + """ + # partial = p0x * p1y - p0y * p1x + partial0 = rp[0][1] * rp[1][0] - rp[0][0] * rp[1][1] + partial1 = rp[1][1] * rp[2][0] - rp[1][0] * rp[2][1] + partial2 = rp[2][1] * rp[3][0] - rp[2][0] * rp[3][1] + partial3 = rp[3][1] * rp[0][0] - rp[3][0] * rp[0][1] + full_sum = partial0 + partial1 + partial2 + partial3 + return 0.5 * full_sum + + +def polygon_area(poly): + # Calculate area + if len(poly) <= 2: + return 0 + + poly_rot = poly[1:] + poly[:1] + + return 0.5 * sum(p[1]*q[0] - p[0]*q[1] for p, q in zip(poly, poly_rot)) + + +def rectangle_vertices(h, w, cy, cx, sin_theta=None, cos_theta=None, theta=None): + """ Get the vertices from a parameterized bounding box. + + y, x ordering where 0,0 is the top left corner. + This matches matrix indexing. + + # http://www.mathopenref.com/coordpolygonarea.html + # https://stackoverflow.com/a/45268241/99379 + """ + if theta is not None: + sin_theta = np.sin(theta) + cos_theta = np.cos(theta) + # normalizing because this may be using the output of the neural network, + # so we turn it into an x y coordinate on the unit circle without changing + # the vector. + sin_theta, cos_theta = normalize_sin_theta_cos_theta(sin_theta, cos_theta) + + dx = w/2 + dy = h/2 + dxcos = dx * cos_theta + dxsin = dx * sin_theta + dycos = dy * cos_theta + dysin = dy * sin_theta + return [ + np.array([cy, cx]) + np.array([-dxsin + -dycos, -dxcos - -dysin]), + np.array([cy, cx]) + np.array([ dxsin + -dycos, dxcos - -dysin]), + np.array([cy, cx]) + np.array([ dxsin + dycos, dxcos - dysin]), + np.array([cy, cx]) + np.array([-dxsin + dycos, -dxcos - dysin]) + ] + + +def encode_sin2_cos2(sin2_cos2): + """ Converts values from the range (-1, 1) to the range (0, 1). + + The value passed is already expected to be in the format: + np.array([np.sin(2 * theta), np.cos(2 * theta)]) + + If you have 2 theta and want to encode that see `encode_2theta()`. + + """ + return (sin2_cos2 / 2.0) + 0.5 + + +def encode_sin_cos(sin_cos): + """ Converts values from the range (-1, 1) to the range (0, 1). + + The value passed is already expected to be in the format: + np.array([np.sin(theta), np.cos(theta)]) + + If you have theta and want to encode that see `encode_theta()`. + + """ + return (sin_cos / 2.0) + 0.5 + + +def encode_2theta(theta): + """ Encodes theta in radians to handle gripper symmetry in 0 to 1 domain + + # Returns + + [sin(2 * theta), cos(2 * theta)] / 2 + 0.5 + + """ + theta2 = theta * 2.0 + return encode_theta(theta2) + + +def encode_theta(theta): + """ Encodes theta in radians to asymmetric grippers in 0 to 1 domain + + # Returns + + [sin(theta), cos(theta)] / 2 + 0.5 + + """ + norm_sin_cos = encode_sin_cos(np.array([np.sin(theta), np.cos(theta)])) + return norm_sin_cos + + +def denorm_sin2_cos2(norm_sin2_cos2): + """ Undo normalization step of `encode_2theta_np()` + + + This converts values from the range (0, 1) to (-1, 1) + by subtracting 0.5 and multiplying by 2.0. + This function does not take any steps to ensure + the input obeys the law: + + sin ** 2 + cos ** 2 == 1 + + Since the values may have been generated by a neural network + it is important to fix this w.r.t. the provided values. + + # Arguments + + norm_sin2_cos2: normalized sin(2*theta) cos(2*theta) + + # Returns + + return actual sin(2*theta) cos(2*theta) + """ + return (norm_sin2_cos2 - 0.5) * 2.0 + + +def denorm_sin_cos(norm_sin_cos): + """ Undo normalization step of `encode_theta_np()` + + + This converts values from the range (0, 1) to (-1, 1) + by subtracting 0.5 and multiplying by 2.0. + This function does not take any steps to ensure + the input obeys the law: + + sin ** 2 + cos ** 2 == 1 + + Since the values may have been generated by a neural network + it is important to fix this w.r.t. the provided values. + + # Arguments + + norm_sin2_cos2: normalized sin(2*theta) cos(2*theta) + + # Returns + + return actual sin(theta) cos(theta) + """ + return (norm_sin_cos - 0.5) * 2.0 + + +def decode_sin2_cos2(norm_sin2_cos2): + """ Decodes the result of encode_2theta() back into an angle theta in radians. + """ + return decode_sin_cos(norm_sin2_cos2) / 2.0 + + +def decode_sin_cos(norm_sin2_cos2): + """ Decodes the result of encode_theta() back into an angle theta in radians. + """ + # rescale and shift from (0, 1) range + # back to (-1, 1) range + # + # note that denorm step is the same for both sin_cos and sin2_cos2 + sin2, cos2 = denorm_sin2_cos2(norm_sin2_cos2) + # normalize the values so they are on the unit circle + sin2, cos2 = normalize_sin_theta_cos_theta(sin2, cos2) + # extract 2x the angle + theta2 = np.arctan2(sin2, cos2) + # return the angle + return theta2 + + +def parse_rectangle_vertices(s2t_c2t_hw_cycx): + """ Convert a dimensions, angle, grasp center, based rectangle to vertices. + + s2t_c2t_hw_cycx: [sin(2*theta), cos(2*theta), height, width, center x, center y] + """ + # sin(2*theta), cos(2*theta) + theta = decode_sin2_cos2(s2t_c2t_hw_cycx[:2]) + rect_vertices = rectangle_vertices( + s2t_c2t_hw_cycx[2], # height + s2t_c2t_hw_cycx[3], # width + s2t_c2t_hw_cycx[4], # center y + s2t_c2t_hw_cycx[5], # center x + theta=theta) + return rect_vertices + + +def parse_rectangle_params(s2t_c2t_hw_cycx): + rect_vertices = parse_rectangle_vertices(s2t_c2t_hw_cycx) + rect_hlines = rectangle_homogeneous_lines(rect_vertices) + return rect_vertices, rect_hlines + + +def intersection_over_union(true_rp, pred_rp, true_rl, pred_rl): + """ Intersection over union of two oriented rectangles. + + Also known as the jaccard metric. + + # Arguments + + true_rp: oriented rectanle 0 points + pred_rp: oriented rectangle 1 points + true_rl: oriented rectangle 0 homogeneous lines + pred_rl: oriented rectangle 1 homogeneous lines + """ + true_area = polygon_area_four_points(true_rp) + pred_area = polygon_area_four_points(pred_rp) + intersection_polygon = rectangle_intersection_polygon(true_rp, true_rl, pred_rp, pred_rl) + intersection_area = polygon_area(intersection_polygon) + + iou = intersection_area / (true_area + pred_area - intersection_area) + return iou + + +def shapely_intersection_over_union(rect0_points, rect1_points, verbose=0): + """ Find the intersection over union of two polygons using shapely + """ + # create and clean the polygons to eliminate any overlapping points + # https://toblerity.org/shapely/manual.html + p0 = Polygon(rect0_points).buffer(0) + p1 = Polygon(rect1_points).buffer(0) + if p0.is_valid and p1.is_valid: + intersection_area = p0.intersection(p1).area + + iou = intersection_area / (p0.area + p1.area - intersection_area) + if verbose > 0: + print('iou: ' + str(iou)) + return iou + else: + # TODO(ahundt) determine and fix the source of invalid polygons. + print('Warning: shapely_intersection_over_union() encountered an ' + 'invalid polygon. We will return an IOU of 0 so execution ' + 'might continue, but this bug should be addressed. ' + 'p0: ' + str(p0) + ' p1: ' + str(p1)) + return 0.0 + + +def normalize_sin_theta_cos_theta(sin_theta, cos_theta): + """ Put sin(theta) cos(theta) on the unit circle. + + Output values will be in (-1, 1). + normalize the prediction but keep the vector direction the same + """ + arr = sklearn.preprocessing.normalize(np.array([[sin_theta, cos_theta]], dtype=np.float)) + sin_theta = arr[0, 0] + cos_theta = arr[0, 1] + return sin_theta, cos_theta + + +def prediction_vector_has_grasp_success(y_pred): + has_grasp_success = (y_pred.size == 7) + return has_grasp_success + + +def get_prediction_vector_rectangle_start_index(y_pred): + """ Get the rectangle start index from an encoded prediction vector of length 6 or 7 + """ + has_grasp_success = prediction_vector_has_grasp_success(y_pred) + # the grasp rectangle start index + rect_index = 0 + if has_grasp_success: + rect_index = 1 + return rect_index + + +def decode_prediction_vector(y_true): + """ Decode a prediction vector into sin(2 * theta), cos(2 * theta), and 4 vertices + """ + rect_index = get_prediction_vector_rectangle_start_index(y_true) + end_angle_index = rect_index + 2 + y_true[rect_index: end_angle_index] = denorm_sin2_cos2(y_true[rect_index:end_angle_index]) + true_y_sin_theta, true_x_cos_theta = y_true[rect_index:end_angle_index] + true_rp = parse_rectangle_vertices(y_true[rect_index:]) + return true_y_sin_theta, true_x_cos_theta, true_rp + + +def decode_prediction_vector_theta_center_polygon(y_true): + """ Decode a prediction vector into theta and four rectangle vertices + + Only supports vector format that includes center information! + """ + rect_index = get_prediction_vector_rectangle_start_index(y_true) + end_angle_index = rect_index + 2 + y_true[rect_index: end_angle_index] = denorm_sin2_cos2(y_true[rect_index:end_angle_index]) + true_y_sin_theta, true_x_cos_theta = y_true[rect_index:end_angle_index] + true_rp = parse_rectangle_vertices(y_true[rect_index:]) + true_y_sin_theta, true_x_cos_theta = normalize_sin_theta_cos_theta(true_y_sin_theta, true_x_cos_theta) + # right now it is 2 theta, so get theta + theta = np.arctan2(true_y_sin_theta, true_x_cos_theta) / 2.0 + # center should be last two entries y, x order + center = y_true[-2:] + return theta, center, true_rp + + +def angle_difference_less_than_threshold( + true_y_sin_theta, true_x_cos_theta, + pred_y_sin_theta, pred_x_cos_theta, + angle_threshold=np.radians(60.0), + verbose=0): + """ Returns true if the angle difference is less than the threshold, false otherwise. + + Recall that angle differences are around a circle, so the shortest angular difference + may be in +theta or the -theta direction with wrapping around the boundaries. + + Note that the angle threshold is set to 60 because we are working with 2*theta. + TODO(ahundt) double check the implications of this. + + # Arguments + angle_threshold: The maximum absolute angular difference permitted. + """ + # print('ad0 ' + str(true_y_sin_theta) + ' cos: ' + str(true_x_cos_theta)) + # normalize the prediction but keep the vector direction the same + true_y_sin_theta, true_x_cos_theta = normalize_sin_theta_cos_theta(true_y_sin_theta, true_x_cos_theta) + # print('ad1') + true_angle = np.arctan2(true_y_sin_theta, true_x_cos_theta) + # print('ad2') + # normalize the prediction but keep the vector direction the same + pred_y_sin_theta, pred_x_cos_theta = normalize_sin_theta_cos_theta(pred_y_sin_theta, pred_x_cos_theta) + pred_angle = np.arctan2(pred_y_sin_theta, pred_x_cos_theta) + # print('pred angle: ' + str(pred_angle) + ' true angle: ' + str(true_angle)) + true_pred_diff = true_angle - pred_angle + # we would have just done this directly at the start if the angle_multiplier wasn't needed + angle_difference = np.arctan2(np.sin(true_pred_diff), np.cos(true_pred_diff)) + # print('angle_difference: ' + str(angle_difference) + ' deg: ' + str(np.degrees(angle_difference))) + is_within_angle_threshold = np.abs(angle_difference) <= angle_threshold + if verbose > 0: + print(' angle_difference_less_than_threshold(): ' + + ' angle_difference: ' + str(int(np.degrees(angle_difference))) + + ' threshold: ' + str(int(np.degrees(angle_threshold))) + + ' is_within_angle_threshold: ' + str(is_within_angle_threshold) + + ' true_angle: ' + str(np.degrees(true_angle)) + + ' pred_angle: ' + str(np.degrees(pred_angle)) + + ' units: degrees ') + return is_within_angle_threshold + + +def jaccard_score(y_true, y_pred, angle_threshold=np.radians(60.0), iou_threshold=0.25, verbose=0): + """ Scoring for regression + Note that the angle threshold is set to 60 because we are working with 2*theta. + TODO(ahundt) double check the implications of this. + + # Arguments + + Feature formats accepted: + + grasp_success_norm_sin2_cos2_hw_yx_7: + [grasp_success, sin_2theta, cos2_theta, height, width, center_y, center_x] + [ 0, 1, 2, 3, 4, 5, 6] + + norm_sin2_cos2_hw_yx_6: + [sin_2theta, cos2_theta, height, width, center_y, center_x] + [ 0, 1, 2, 3, 4, 5, 6] + + + Not yet accepted: + norm_sin2_cos2_hw_5 + [sin2_theta, cos_2theta, height, width, center_y, center_x] + [ 0, 1, 2, 3, 4, 5] + + grasp_success_norm_sin2_cos2_hw_5 + [grasp_success, sin_2theta, cos2_theta, height, width] + [ 0, 1, 2, 3, 4,] + + + y_true: a numpy array of features + y_pred: a numpy array of features + angle_threshold: The maximum allowed difference in + angles for a grasp to be considered successful. + Default of 60 degrees is for 2 * theta, which is 30 degrees for theta. + theta_multiplier: Either 1.0 or 2.0. + If it is 1.0 theta angles are compared directly. + If it is 2.0 (the default), angles that are off by 180 degrees + are considered equal, which is the case for a gripper with two plates. + + + """ + + has_grasp_success = prediction_vector_has_grasp_success(y_pred) + + # print('0') + # round grasp success to 0 or 1 + # note this is not valid and not used if + # has grasp success is false. + predicted_success = np.rint(y_pred[0]) + # print('1') + if has_grasp_success and predicted_success != int(y_true[0]): + # grasp success prediction doesn't match, return 0 score + # print('2') + return 0.0 + elif has_grasp_success and predicted_success == 0: + # The success prediction correctly matches the ground truth, + # plus both are False so this is a true negative. + # Any true negative where failure to grasp is predicted correctly + # gets credit regardless of box contents + # print('3') + return 1.0 + else: + # We're looking at a successful grasp and we've correctly predicted grasp_success. + # First check if the angles are close enough to matching the angle_threshold. + # print('4') + + # denormalize the values from (0, 1) back to (-1, 1 range) and get the array entries + true_y_sin_theta, true_x_cos_theta, true_rp = decode_prediction_vector(y_true) + pred_y_sin_theta, pred_x_cos_theta, pred_rp = decode_prediction_vector(y_pred) + + # print('5') + # if the angle difference isn't close enough to ground truth return 0.0 + if not angle_difference_less_than_threshold( + true_y_sin_theta, true_x_cos_theta, + pred_y_sin_theta, pred_x_cos_theta, + angle_threshold, + verbose=verbose): + return 0.0 + + # print('6') + # We passed all the other checks so + # let's find out if the grasp boxes match + # via the jaccard distance. + iou = shapely_intersection_over_union(true_rp, pred_rp) + if verbose: + print('iou: ' + str(iou)) + # print('8') + if iou >= iou_threshold: + # passed iou threshold + return 1.0 + else: + # didn't meet iou threshold + return 0.0 + + +def grasp_jaccard_batch(y_true, y_pred, verbose=0): + # print('y_true.shape: ' + str(y_true.shape)) + # print('y_pred.shape: ' + str(y_pred.shape)) + scores = [] + for i in range(y_true.shape[0]): + # print(' i: ' + str(i)) + # TODO(ahundt) comment the next few lines when not debugging + verbose = 0 + if np.random.randint(0, 10000) % 10000 == 0: + verbose = 1 + print('') + print('') + print('grasp_metrics.py sample of ground_truth and prediction:') + this_true = y_true[i, :] + this_pred = y_pred[i, :] + score = jaccard_score(this_true, this_pred, verbose=verbose) + if verbose: + print('s2t_c2t_hw_cycx_true: ' + str(this_true)) + print('s2t_c2t_hw_cycx_pred: ' + str(this_pred)) + print('score:' + str(score)) + scores += [score] + scores = np.array(scores, dtype=np.float32) + # print('scores.shape: ' + str(scores.shape)) + return scores + + +def grasp_jaccard(y_true, y_pred): + """ Calculates the jaccard metric score in a manner compatible with tf and keras metrics. + + This is an IOU metric with angle difference and IOU score thresholds. + + Feature formats accepted as a 2d array containing a batch of data ordered as: + [grasp_success, sin_2theta, cos_2theta, height, width, center_y, center_x] + [ 0, 1, 2, 3, 4, 5, 6] + + [sin_2theta, cos_2theta, height, width, center_y, center_x] + [ 0, 1, 2, 3, 4, 5] + + It is very important to be aware that sin(2*theta) and cos(2*theta) are expected, + additionally all coordinates and height/width are normalized by the network's input dimensions. + """ + scores = tf.py_func(func=grasp_jaccard_batch, inp=[y_true, y_pred], Tout=tf.float32, stateful=False) + return scores + + +def rotation_to_xyz_theta(rotation, verbose=0): + """Convert a rotation to an angle theta + + From above, a rotation to the right should be a positive theta, + and a rotation to the left negative theta. The initial pose is with the + z axis pointing down, the y axis to the right and the x axis forward. + + This format does not allow for arbitrary rotation commands to be defined, + and originates from the costar dataset. + + In the google brain dataset the gripper is only commanded to + rotate around a single vertical axis, + so you might clearly visualize it, this also happens to + approximately match the vector defined by gravity. + Furthermore, the original paper had the geometry of the + arm joints on which params could easily be extracted, + which is not available here. To resolve this discrepancy + Here we assume that the gripper generally starts off at a + quaternion orientation of approximately [qx=-1, qy=0, qz=0, qw=0]. + This is equivalent to the angle axis + representation of [a=np.pi, x=-1, y=0, z=0], + which I'll name default_rot. + + It is also important to note the ambiguity of the + angular distance between any current pose + and the end pose. This angular distance will + always have a positive value so the network + could not naturally discriminate between + turning left and turning right. + For this reason, we use the angular distance + from default_rot to define the input angle parameter, + and if the angle axis x axis component is > 0 + we will use theta for rotation, + but if the angle axis x axis component is < 0 + we will use -theta. + """ + # pyquaternion is in xyzw format! + aa = Quaternion(rotation) + # angle in radians + theta = aa.angle + if aa.axis[2] < 0: + multiply = 1.0 + else: + multiply = -1.0 + if verbose > 0: + print("ANGLE_AXIS_MULTIPLY: ", aa.angle, np.array(aa.axis), multiply) + theta *= multiply + + return np.concatenate([aa.axis, [theta]], axis=-1) + + +def normalize_axis(aaxyz, epsilon=1e-5, verbose=0): + """ Normalize an axis in angle axis format data. + + If axis is all zeros, epsilon is added to the final axis. + """ + if not np.any(aaxyz): + # source: https://stackoverflow.com/a/23567941/99379 + # we checked if all values are zero, fix missing axis + aaxyz[-1] += epsilon + arr = sklearn.preprocessing.normalize(np.array([aaxyz], dtype=np.float)) + aaxyz = np.squeeze(arr[0, :]) + if verbose: + print('normalize_axis: ' + str(aaxyz)) + return aaxyz + + +def encode_xyz_qxyzw_to_xyz_aaxyz_nsc(xyz_qxyzw, rescale_meters=4, rotation_weight=1, random_augmentation=None): + """ Encode a translation + quaternion pose to an encoded xyz, axis, and an angle as sin(theta) cos(theta) + + rescale_meters: Divide the number of meters by this number so + positions will be encoded between 0 and 1. + For example if you want to be able to reach forward and back by 2 meters, divide by 4. + rotation_weight: scale down rotation values by this factor to a smaller range + so mse gives similar weight to both rotations and translations. + Use 1.0 for no adjustment. Default of 0.001 makes 1 radian + about equal weight to 1 millimeter. + random_augmentation: default None means no data modification, + otherwise a value between 0.0 and 1.0 for the probability + of randomly modifying the data with a small translation and rotation. + Enabling random_augmentation is not recommended. + """ + xyz = (xyz_qxyzw[:3] / rescale_meters) + 0.5 + length = len(xyz_qxyzw) + if length == 7: + # print('xyz: ' + str(xyz)) + rotation = Quaternion(xyz_qxyzw[3:]) + # pose augmentation with no feedback or correspondingly adjusted transform poses + if random_augmentation is not None and np.random.random() > random_augmentation: + # random rotation change + # random = Quaternion.random() + # # only take rotations less than 5 degrees + # while random.angle > np.pi / 36.: + # # TODO(ahundt) make more efficient and re-enable + # random = Quaternion.random() + # rotation = rotation * random + # random translation change of up to 0.5 cm + random = (np.random.random(3) - 0.5) / 10. + xyz = xyz + random + + aaxyz_theta = rotation_to_xyz_theta(rotation) + # encode the unit axis vector into the [0,1] range + # rotation_weight makes it so mse applied to rotation values + # is on a similar scale to the translation values. + aaxyz = ((aaxyz_theta[:-1] / 2) * rotation_weight) + 0.5 + nsc = encode_theta(aaxyz_theta[-1]) + # print('nsc: ' + str(nsc)) + xyz_aaxyz_nsc = np.concatenate([xyz, aaxyz, nsc], axis=-1) + return xyz_aaxyz_nsc + elif length == 3: + if random_augmentation is not None and np.random.random() > random_augmentation: + # random translation change of up to 0.5 cm + random = (np.random.random(3) - 0.5) / 10. + xyz = xyz + random + + return xyz + else: + raise ValueError('encode_xyz_qxyzw_to_xyz_aaxyz_nsc: unsupported input data length of ' + str(length)) + + +def batch_encode_xyz_qxyzw_to_xyz_aaxyz_nsc(batch_xyz_qxyzw, rescale_meters=4, rotation_weight=1, random_augmentation=None): + """ Expects n by 7 batch with xyz_qxyzw + + rescale_meters: Divide the number of meters by this number so + positions will be encoded between 0 and 1. + For example if you want to be able to reach forward and back by 2 meters, divide by 4. + rotation_weight: scale down rotation values by this factor to a smaller range + so mse gives similar weight to both rotations and translations. + Use 1.0 for no adjustment. + random_augmentation: default None means no data modification, + otherwise a value between 0.0 and 1.0 for the probability + of randomly modifying the data with a small translation and rotation. + Enabling random_augmentation is not recommended. + """ + encoded_poses = [] + for xyz_qxyzw in batch_xyz_qxyzw: + # print('xyz_qxyzw: ' + str(xyz_qxyzw)) + xyz_aaxyz_nsc = encode_xyz_qxyzw_to_xyz_aaxyz_nsc( + xyz_qxyzw, rescale_meters=rescale_meters, rotation_weight=rotation_weight, random_augmentation=random_augmentation) + # print('xyz_aaxyz_nsc: ' + str(xyz_aaxyz_nsc)) + encoded_poses.append(xyz_aaxyz_nsc) + return np.stack(encoded_poses, axis=0) + + +def decode_xyz_aaxyz_nsc_to_xyz_qxyzw(xyz_aaxyz_nsc, rescale_meters=4, rotation_weight=1): + """ Encode a translation + quaternion pose to an encoded xyz, axis, and an angle as sin(theta) cos(theta) + + rescale_meters: Divide the number of meters by this number so + positions will be encoded between 0 and 1. + For example if you want to be able to reach forward and back by 2 meters, divide by 4. + rotation_weight: scale down rotation values by this factor to a smaller range + so mse gives similar weight to both rotations and translations. + Use 1.0 for no adjustment. + """ + xyz = (xyz_aaxyz_nsc[:3] - 0.5) * rescale_meters + length = len(xyz_aaxyz_nsc) + if length == 8: + theta = decode_sin_cos(xyz_aaxyz_nsc[-2:]) + # decode ([0, 1] * rotation_weight) range to [-1, 1] range + aaxyz = ((xyz_aaxyz_nsc[3:-2] - 0.5) * 2) / rotation_weight + # aaxyz is axis component of angle axis format, + # Note that rotation_weight is automatically removed by normalization step. + aaxyz = normalize_axis(aaxyz) + q = Quaternion(axis=aaxyz, angle=theta) + xyz_qxyzw = np.concatenate([xyz, q.elements], axis=-1) + return xyz_qxyzw + elif length != 3: + raise ValueError('decode_xyz_aaxyz_nsc_to_xyz_qxyzw: unsupported input data length of ' + str(length)) + return xyz + + +def grasp_acc(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.01, max_rotation=0.261799): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + Limits default to 15 degrees and 1cm. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.01 meters, or 1cm, + translations must be less than this distance away. + max_rotation: defaults to 15 degrees in radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_5mm_7_5deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.005, max_rotation=0.1308995): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + Limits default to 7.5 degrees and 0.5cm. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.005 meters, which is 0.5cm, + translations must be less than this distance away. + max_rotation: defaults to 7.5 degrees, which is 0.1308995 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_1cm_15deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.01, max_rotation=0.261799): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + Limits default to 15 degrees and 1cm. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.01 meters, which is 1cm, + translations must be less than this distance away. + max_rotation: defaults to 15 degrees in radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_2cm_30deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.02, max_rotation=0.523598): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_4cm_60deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.04, max_rotation=1.047196): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_8cm_120deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.08, max_rotation=2.094392): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_16cm_240deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.16, max_rotation=4.188784): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_32cm_360deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.32, max_rotation=6.2832): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_64cm_360deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.64, max_rotation=6.2832): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_128cm_360deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=1.28, max_rotation=6.2832): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_256cm_360deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=2.56, max_rotation=6.2832): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def grasp_acc_512cm_360deg(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=5.12, max_rotation=6.2832): + """ Calculate 3D grasp accuracy for a single result with grasp_accuracy_xyz_aaxyz_nsc encoding. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.02 meters, which is 2cm, + translations must be less than this distance away. + max_rotation: defaults to 30 degrees, which is 0.523598 radians, + rotations must be less than this angular distance away. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + grasp_accuracy_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation, max_rotation], + [tf.float32], stateful=False, + name='py_func/grasp_accuracy_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def cart_error(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate 3D grasp accuracy for a single result + grasp_accuracy_xyz_aaxyz_nsc + max_translation defaults to 0.1 meters, or 1cm. + max_rotation defaults to 15 degrees in radians. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + absolute_cart_distance_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc], + [tf.float32], stateful=False, + name='py_func/absolute_cart_distance_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def angle_error(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate 3D grasp accuracy for a single result + Input format is xyz_aaxyz_nsc. + max_translation defaults to 0.1 meters, or 1cm. + max_rotation defaults to 15 degrees in radians. + """ + # TODO(ahundt) make a single, simple call for grasp_accuracy_xyz_aaxyz_nsc, no py_func etc + [filter_result] = tf.py_func( + absolute_angle_distance_xyz_aaxyz_nsc_batch, + [y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc], + [tf.float32], stateful=False, + name='py_func/absolute_angle_distance_xyz_aaxyz_nsc_batch') + filter_result.set_shape(y_true_xyz_aaxyz_nsc.get_shape()[0]) + return filter_result + + +def absolute_angle_distance_xyz_aaxyz_nsc_single(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate 3D grasp accuracy for a single result + + max_translation is 0.01 meters, or 1cm. + max_rotation is 15 degrees in radians. + Input format is xyz_aaxyz_nsc. + + This version is for a single pair of numpy arrays of length 8. + """ + length = len(y_true_xyz_aaxyz_nsc) + if length == 5: + # workaround rotation distance only, + # just use [0.5, 0.5, 0.5] for translation component + # so existing code can be utilized + fake_translation = np.array([0.5, 0.5, 0.5]) + y_true_xyz_aaxyz_nsc = np.concatenate([fake_translation, y_true_xyz_aaxyz_nsc]) + y_pred_xyz_aaxyz_nsc = np.concatenate([fake_translation, y_pred_xyz_aaxyz_nsc]) + + y_true_xyz_qxyzw = decode_xyz_aaxyz_nsc_to_xyz_qxyzw(y_true_xyz_aaxyz_nsc) + y_pred_xyz_qxyzw = decode_xyz_aaxyz_nsc_to_xyz_qxyzw(y_pred_xyz_aaxyz_nsc) + y_true_q = Quaternion(y_true_xyz_qxyzw[3:]) + y_pred_q = Quaternion(y_pred_xyz_qxyzw[3:]) + return Quaternion.absolute_distance(y_true_q, y_pred_q) + + +def absolute_angle_distance_xyz_aaxyz_nsc_batch(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate 3D grasp accuracy for a single result + Expects batch of data as an nx8 array. Eager execution / numpy version. + + max_translation defaults to 0.01 meters, or 1cm. + max_rotation defaults to 15 degrees in radians. + Input format is xyz_aaxyz_nsc. + """ + # print('type of y_true_xyz_aaxyz_nsc: ' + str(type(y_true_xyz_aaxyz_nsc))) + accuracies = [] + for y_true, y_pred in zip(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + one_accuracy = absolute_angle_distance_xyz_aaxyz_nsc_single(y_true, y_pred) + # print('one grasp acc: ' + str(one_accuracy)) + accuracies.append(one_accuracy) + accuracies = np.array(accuracies, np.float32) + return accuracies + + +def absolute_cart_distance_xyz_aaxyz_nsc_single(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate cartesian distance of encoded pose + + This version is for a single pair of numpy arrays of length 8. + Input format is xyz_aaxyz_nsc. + """ + y_true_xyz_qxyzw = decode_xyz_aaxyz_nsc_to_xyz_qxyzw(y_true_xyz_aaxyz_nsc) + y_pred_xyz_qxyzw = decode_xyz_aaxyz_nsc_to_xyz_qxyzw(y_pred_xyz_aaxyz_nsc) + # translation distance + return np.linalg.norm(y_true_xyz_qxyzw[:3] - y_pred_xyz_qxyzw[:3]) + + +def absolute_cart_distance_xyz_aaxyz_nsc_batch(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + """ Calculate 3D grasp accuracy for a single result + Expects batch of data as an nx8 array. Eager execution / numpy version. + + max_translation defaults to 0.01 meters, or 1cm. + max_rotation defaults to 15 degrees in radians. + """ + # print('type of y_true_xyz_aaxyz_nsc: ' + str(type(y_true_xyz_aaxyz_nsc))) + accuracies = [] + for y_true, y_pred in zip(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + one_accuracy = absolute_cart_distance_xyz_aaxyz_nsc_single(y_true, y_pred) + # print('one grasp acc: ' + str(one_accuracy)) + accuracies.append(one_accuracy) + accuracies = np.array(accuracies, np.float32) + return accuracies + + +def grasp_accuracy_xyz_aaxyz_nsc_single(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.01, max_rotation=0.261799): + """ Calculate 3D grasp accuracy for a single 1D numpy array for the ground truth and predicted value. + + Return 1 if the prediction meets both the translation and rotation accuracy criteria, 0 otherwise. + + Supported formats are translation xyz with length 3, + aaxyz_nsc which is an axis and normalized sin(theta) cos(theta) with length 5, + or xyz_aaxyz_nsc which incorporates both of the above with length 8. + + max_translation: defaults to 0.01 meters, or 1cm, + translations must be less than this distance away. + max_rotation: defaults to 15 degrees in radians, + rotations must be less than this angular distance away. + """ + length = len(y_true_xyz_aaxyz_nsc) + if length == 3 or length == 8: + # translation distance + translation = absolute_cart_distance_xyz_aaxyz_nsc_single(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc) + if length == 3: + # translation component only + if translation < max_translation: + return 1. + # translation and rotation + elif length == 8: + # rotation distance + angle_distance = absolute_angle_distance_xyz_aaxyz_nsc_single(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc) + if angle_distance < max_rotation and translation < max_translation: + return 1. + elif length == 5: + # rotation distance only, just use [0.5, 0.5, 0.5] for translation component so existing code can be utilized + fake_translation = np.array([0.5, 0.5, 0.5]) + angle_distance = absolute_angle_distance_xyz_aaxyz_nsc_single( + np.concatenate([fake_translation, y_true_xyz_aaxyz_nsc]), + np.concatenate([fake_translation, y_pred_xyz_aaxyz_nsc])) + if angle_distance < max_rotation: + return 1. + else: + raise ValueError('grasp_accuracy_xyz_aaxyz_nsc_single: unsupported label value format of length ' + str(length)) + return 0. + + +def grasp_accuracy_xyz_aaxyz_nsc_batch(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc, max_translation=0.01, max_rotation=0.261799): + """ Calculate 3D grasp accuracy for a single result + Expects batch of data as an nx8 array. Eager execution / numpy version. + + max_translation defaults to 0.01 meters, or 1cm. + max_rotation defaults to 15 degrees in radians. + """ + # print('type of y_true_xyz_aaxyz_nsc: ' + str(type(y_true_xyz_aaxyz_nsc))) + accuracies = [] + for y_true, y_pred in zip(y_true_xyz_aaxyz_nsc, y_pred_xyz_aaxyz_nsc): + one_accuracy = grasp_accuracy_xyz_aaxyz_nsc_single( + y_true, y_pred, max_translation=max_translation, max_rotation=max_rotation) + # print('one grasp acc: ' + str(one_accuracy)) + accuracies.append(one_accuracy) + accuracies = np.array(accuracies, np.float32) + return accuracies diff --git a/enas/cifar10/grasp_utilities.py b/enas/cifar10/grasp_utilities.py new file mode 100644 index 0000000..2061625 --- /dev/null +++ b/enas/cifar10/grasp_utilities.py @@ -0,0 +1,245 @@ +import sys +import re +import numpy as np +import os +import json +import datetime +import errno +import json +import six + + +class NumpyEncoder(json.JSONEncoder): + """ json encoder for numpy types + + source: https://stackoverflow.com/a/49677241/99379 + """ + def default(self, obj): + if isinstance(obj, + (np.int_, np.intc, np.intp, np.int8, + np.int16, np.int32, np.int64, np.uint8, + np.uint16, np.uint32, np.uint64)): + return int(obj) + elif isinstance(obj, + (np.float_, np.float16, np.float32, + np.float64)): + return float(obj) + elif isinstance(obj, (np.ndarray,)): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + +def rotate(data, shift=1): + """ Rotates indices up 1 for a list or numpy array. + + For example, [0, 1, 2] will become [1, 2, 0] and + [4, 3, 1, 0] will become [3, 1, 0, 4]. + The contents of index 0 becomes the contents of index 1, + and the final entry will contain the original contents of index 0. + Always operates on axis 0. + """ + if isinstance(data, list): + return data[shift:] + data[:shift] + else: + return np.roll(data, shift, axis=0) + + +def mkdir_p(path): + """Create the specified path on the filesystem like the `mkdir -p` command + + Creates one or more filesystem directory levels as needed, + and does not return an error if the directory already exists. + """ + # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def timeStamped(fname, fmt='%Y-%m-%d-%H-%M-%S_{fname}'): + """ Apply a timestamp to the front of a filename description. + + see: http://stackoverflow.com/a/5215012/99379 + """ + return datetime.datetime.now().strftime(fmt).format(fname=fname) + + +def load_hyperparams_json(hyperparams_file, fine_tuning=False, learning_rate=None, feature_combo_name=None): + """ Load hyperparameters from a json file + + # Returns + + Hyperparams + """ + kwargs = {} + hyperparams = None + if hyperparams_file is not None and hyperparams_file: + with open(hyperparams_file, mode='r') as hyperparams: + kwargs = json.load(hyperparams) + hyperparams = kwargs + if fine_tuning: + kwargs['trainable'] = True + kwargs['learning_rate'] = learning_rate + # TODO(ahundt) should we actually write the fine tuning settings out to the hyperparams log? + # hyperparams = kwargs + + if (kwargs is not None and feature_combo_name is not None and + 'feature_combo_name' in kwargs and + kwargs['feature_combo_name'] != feature_combo_name): + print('Warning: overriding old hyperparam feature_combo_name: %s' + ' with new feature_combo_name: %s. This means the network ' + 'structure and inputs will be different from what is defined ' + 'in the hyperparams file: %s' % + (kwargs['feature_combo_name'], feature_combo_name, hyperparams_file)) + kwargs.pop('feature_combo_name') + if 'feature_combo_name' in hyperparams: + hyperparams.pop('feature_combo_name') + return kwargs + + +def is_sequence(arg): + """Returns true if arg is a list or another Python Sequence, and false otherwise. + + source: https://stackoverflow.com/a/17148334/99379 + """ + return (not hasattr(arg, "strip") and + hasattr(arg, "__getitem__") or + hasattr(arg, "__iter__")) + + +def find_best_weights(fold_log_dir, match_string='', verbose=0, out_file=sys.stdout): + """ Find the best weights file with val_*0.xxx out in a directory + """ + # Now we have to load the best model + # '200_epoch_real_run' is for backwards compatibility before + # the fold nums were put into each fold's log_dir and run_name. + directory_listing = os.listdir(fold_log_dir) + fold_checkpoint_files = [] + for name in directory_listing: + name = os.path.join(fold_log_dir, name) + if not os.path.isdir(name) and '.h5' in name: + if '200_epoch_real_run' in name or match_string in name: + fold_checkpoint_files += [name] + + # check the filenames for the highest val score + fold_checkpoint_file = None + best_val = 0.0 + for filename in fold_checkpoint_files: + if 'val_' in filename: + # pull out all the floating point numbers + # source: https://stackoverflow.com/a/4703409/99379 + nums = re.findall(r"[-+]?\d*\.\d+|\d+", filename) + if len(nums) > 0: + # don't forget about the .h5 at the end... + cur_num = np.abs(float(nums[-2])) + if verbose > 0: + out_file.write('old best ' + str(best_val) + ' current ' + str(cur_num)) + if cur_num > best_val: + if verbose > 0: + out_file.write('new best: ' + str(cur_num) + ' file: ' + filename) + best_val = cur_num + fold_checkpoint_file = filename + + if fold_checkpoint_file is None: + raise ValueError('\n\nSomething went wrong when looking for model checkpoints, ' + 'you need to take a look at model_predict_k_fold() ' + 'in cornell_grasp_train.py. Here are the ' + 'model checkpoint files we were looking at: \n\n' + + str(fold_checkpoint_files)) + return fold_checkpoint_file + + +def make_model_description(run_name, model_name, hyperparams, dataset_names_str, label_features=None): + """ Put several strings together for a model description used in file and folder names + """ + model_description = '' + if run_name: + model_description += run_name + '-' + if model_name: + model_description += model_name + '-' + + # if hyperparams is not None: + # if 'image_model_name' in hyperparams: + # model_description += '_img_' + hyperparams['image_model_name'] + # if 'vector_model_name' in hyperparams: + # model_description += '_vec_' + hyperparams['vector_model_name'] + # if 'trunk_model_name' in hyperparams: + # model_description += '_trunk_' + hyperparams['trunk_model_name'] + ######################################################## + # End tensor configuration, begin model configuration and training + model_description += '-dataset_' + dataset_names_str + + if label_features is not None: + model_description += '-' + label_features + + run_name = timeStamped(model_description) + return run_name + + +def multi_run_histories_summary( + run_histories, + save_filename=None, + metrics='val_binary_accuracy', + description_prefix='k_fold_average_', + results_prefix='k_fold_results', + multi_history_metrics='mean', + verbose=1): + """ Find the k_fold average of the best model weights on each fold, and save the results. + + This can be used to summarize multiple runs, be they on different models or the same model. + + Please note that currently this should only be utilized with classification models, + or regression models with absolute thresholds. + it will not calculated grasp_jaccard regression models' scores correctly. + + # Arguments + + run_histories: A dictionary from training run description strings to keras history objects. + multi_history_metric: 'mean', 'min', 'max', + used to summarize the data from multiple training runs. + + # Returns + + results disctionary including the max value of metric for each fold, + plus the average of all folds in a dictionary. + """ + if isinstance(metrics, str): + metrics = [metrics] + if isinstance(multi_history_metrics, str): + multi_history_metrics = [multi_history_metrics] + results = {} + for metric, multi_history_metric in zip(metrics, multi_history_metrics): + best_metric_scores = [] + for history_description, history_object in six.iteritems(run_histories): + if 'loss' in metric or 'error' in metric: + best_score = np.min(history_object.history[metric]) + results[history_description + '_min_' + metric] = best_score + else: + best_score = np.max(history_object.history[metric]) + results[history_description + '_max_' + metric] = best_score + best_metric_scores += [best_score] + if multi_history_metric == 'mean' or multi_history_metric == 'average': + k_fold_average = np.mean(best_metric_scores) + elif multi_history_metric == 'min': + k_fold_average = np.min(best_metric_scores) + elif multi_history_metric == 'max': + k_fold_average = np.max(best_metric_scores) + else: + raise ValueError( + 'multi_run_histories_summary(): Unsupported multi_history_metric: ' + + str(multi_history_metric)) + result_key = description_prefix + '_' + multi_history_metric + '_' + metric + results[result_key] = k_fold_average + + if verbose: + print(str(results_prefix) + ':\n ' + str(results)) + + if save_filename is not None: + with open(save_filename, 'w') as fp: + # save out all kfold params so they can be reloaded in the future + json.dump(results, fp) + return results diff --git a/enas/cifar10/image_ops.py b/enas/cifar10/image_ops.py index ea10547..afe0613 100644 --- a/enas/cifar10/image_ops.py +++ b/enas/cifar10/image_ops.py @@ -1,6 +1,7 @@ import numpy as np import tensorflow as tf from tensorflow.python.training import moving_averages +import traceback from enas.common_ops import create_weight from enas.common_ops import create_bias @@ -89,6 +90,16 @@ def global_avg_pool(x, data_format="NHWC"): return x +def global_max_pool(x, data_format="NHWC"): + if data_format == "NHWC": + x = tf.reduce_max(x, [1, 2]) + elif data_format == "NCHW": + x = tf.reduce_max(x, [2, 3]) + else: + raise NotImplementedError("Unknown data_format {}".format(data_format)) + return x + + def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5, data_format="NHWC"): if data_format == "NHWC": @@ -130,6 +141,85 @@ def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5, return x +def norm(x, is_training, name=None, decay=0.9, epsilon=1e-5, data_format="NHWC", norm_type='group', G=32, verbose=0): + """ Perform batch normalization or group normalization, depending on norm_type argument. + norm_type: options include, none, batch, and group. + reference: https://github.com/shaohua0116/Group-Normalization-Tensorflow + """ + shape_list = x.get_shape().as_list() + if verbose > 0: + print('-' * 80) + print('group_norm input x shape outside scope: ' + str(shape_list) + ' data_format: ' + str(data_format)) + for line in traceback.format_stack(): + print(line.strip()) + if data_format == "NHWC": + c_shape = [x.get_shape()[3]] + elif data_format == "NCHW": + c_shape = [x.get_shape()[1]] + else: + raise NotImplementedError("Unknown data_format {}".format(data_format)) + if name is None: + name = norm_type + '_norm' + with tf.variable_scope(name, reuse=None if is_training else True): + + if norm_type == 'none': + output = x + elif norm_type == 'batch': + output = batch_norm( + x=x, is_training=is_training, name=name, + decay=decay, epsilon=epsilon, data_format=data_format) + elif norm_type == 'group': + # normalize + # tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper + # print('group_norm input x shape inside scope: ' + str(x.get_shape().as_list())) + if data_format == "NHWC": + x = tf.transpose(x, [0, 3, 1, 2]) + # c_shape = [x.get_shape()[3]] + # channels_axis=-1, reduction_axes=[-3, -2] + elif data_format == "NCHW": + pass + # already in the right format + # c_shape = [x.get_shape()[1]] + # channels_axis=-3, reduction_axes=[-2, -1] + else: + raise NotImplementedError("Unknown data_format {}".format(data_format)) + shape = tf.shape(x) + N = shape[0] + C = shape[1] + H = shape[2] + W = shape[3] + G = tf.minimum(G, C) + x = tf.reshape(x, [N, G, C // G, H, W]) + mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True) + x = (x - mean) / tf.sqrt(var + epsilon) + # per channel gamma and beta + gamma = tf.get_variable('gamma', c_shape, + initializer=tf.constant_initializer(1.0, dtype=tf.float32)) + beta = tf.get_variable('beta', c_shape, + initializer=tf.constant_initializer(0.0, dtype=tf.float32)) + gamma = tf.reshape(gamma, [1, C, 1, 1]) + beta = tf.reshape(beta, [1, C, 1, 1]) + + output = tf.reshape(x, [N, C, H, W]) * gamma + beta + + if data_format == "NHWC": + # tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper + output = tf.transpose(output, [0, 2, 3, 1]) + elif data_format == "NCHW": + # already in the right format + pass + else: + raise NotImplementedError("Unknown data_format {}".format(data_format)) + # recover initial shape information + if shape_list[0] is None: + # first index is batch, that should be inferred + shape_list[0] = -1 + output = tf.reshape(output, shape_list) + else: + raise NotImplementedError + return output + + def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn", decay=0.9, epsilon=1e-3, data_format="NHWC"): diff --git a/enas/cifar10/main.py b/enas/cifar10/main.py index 644e5c7..26be2b3 100644 --- a/enas/cifar10/main.py +++ b/enas/cifar10/main.py @@ -5,9 +5,9 @@ import os try: - import cPickle as pickle + import cPickle as pickle except ImportError: - import _pickle as pickle + import _pickle as pickle import shutil import sys @@ -33,22 +33,36 @@ from enas.cifar10.micro_controller import MicroController from enas.cifar10.micro_child import MicroChild + flags = tf.app.flags FLAGS = flags.FLAGS DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.") DEFINE_string("data_path", "", "") +DEFINE_string("data_base_path", "~/.keras/datasets/costar_block_stacking_dataset_v0.3/", "") DEFINE_string("output_dir", "", "") DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'") -DEFINE_string("dataset", "cifar", "'cifar' or 'fmnist'") +DEFINE_string("dataset", "cifar", "'cifar' or 'fmnist' or 'stacking'") DEFINE_string("search_for", None, "Must be [macro|micro]") DEFINE_integer("batch_size", 32, "") +DEFINE_integer("valid_set_size", 128, "") +DEFINE_integer("height_img", 32, "") +DEFINE_integer("width_img", 32, "") +DEFINE_boolean("regression", False, "Task is regression or classification") +DEFINE_boolean("translation_only", False, "Translation only case") +DEFINE_boolean("use_root", False, "Process image and vector and then tile") +DEFINE_boolean("one_hot_encoding", False, "Use one hot encoding for labels (only for stacking dataset)") +DEFINE_boolean("rotation_only", False, "Rotation only case") +DEFINE_boolean("stacking_reward", False, "Train a block stacking critic which estimates the reward of a proposed action based on the current state and command.") +DEFINE_integer("max_loss", 0, "To set positive reward; for stacking dataset only") +DEFINE_boolean("use_msle", False, "Use Mean Square Logarithmic Error as Loss") DEFINE_integer("num_epochs", 300, "") DEFINE_integer("child_lr_dec_every", 100, "") -DEFINE_integer("child_num_layers", 5, "") -DEFINE_integer("child_num_cells", 5, "") +DEFINE_integer("child_num_layers", 5, "number of layer blocks") +DEFINE_integer("child_num_cells", 5, "number of cells in a single layer") +DEFINE_integer("child_pool_distance", 2, "number of layers between each pooling step, which reduces the resolution") DEFINE_integer("child_filter_size", 5, "") DEFINE_integer("child_out_filters", 48, "") DEFINE_integer("child_out_filters_scale", 1, "") @@ -72,6 +86,7 @@ DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head") DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.") DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule") +DEFINE_string("child_optimizer", "momentum", "Optimization algorithm, one of sgd, momentum or adam") DEFINE_float("controller_lr", 1e-3, "") DEFINE_float("controller_lr_dec_rate", 1.0, "") @@ -94,274 +109,414 @@ DEFINE_boolean("controller_sync_replicas", False, "To sync or not to sync.") DEFINE_boolean("controller_training", True, "") DEFINE_boolean("controller_use_critic", False, "") +DEFINE_string("controller_optimizer", "adam", "Optimization algorithm, one of sgd, momentum or adam") DEFINE_integer("log_every", 50, "How many steps to log") DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval") +flags.DEFINE_float( + 'random_augmentation', + None, + 'Frequency from 0.0 to 1.0 with which random augmentation is performed. ' + 'Disabled by default and currently for block stacking dataset only.' +) + + def get_ops(images, labels): - """ - Args: - images: dict with keys {"train", "valid", "test"}. - labels: dict with keys {"train", "valid", "test"}. - """ - - assert FLAGS.search_for is not None, "Please specify --search_for" - - if FLAGS.search_for == "micro": - ControllerClass = MicroController - ChildClass = MicroChild - else: - ControllerClass = GeneralController - ChildClass = GeneralChild - - child_model = ChildClass( - images, - labels, - use_aux_heads=FLAGS.child_use_aux_heads, - cutout_size=FLAGS.child_cutout_size, - whole_channels=FLAGS.controller_search_whole_channels, - num_layers=FLAGS.child_num_layers, - num_cells=FLAGS.child_num_cells, - num_branches=FLAGS.child_num_branches, - fixed_arc=FLAGS.child_fixed_arc, - out_filters_scale=FLAGS.child_out_filters_scale, - out_filters=FLAGS.child_out_filters, - keep_prob=FLAGS.child_keep_prob, - drop_path_keep_prob=FLAGS.child_drop_path_keep_prob, - num_epochs=FLAGS.num_epochs, - l2_reg=FLAGS.child_l2_reg, - data_format=FLAGS.data_format, - batch_size=FLAGS.batch_size, - clip_mode="norm", - grad_bound=FLAGS.child_grad_bound, - lr_init=FLAGS.child_lr, - lr_dec_every=FLAGS.child_lr_dec_every, - lr_dec_rate=FLAGS.child_lr_dec_rate, - lr_cosine=FLAGS.child_lr_cosine, - lr_max=FLAGS.child_lr_max, - lr_min=FLAGS.child_lr_min, - lr_T_0=FLAGS.child_lr_T_0, - lr_T_mul=FLAGS.child_lr_T_mul, - optim_algo="momentum", - sync_replicas=FLAGS.child_sync_replicas, - num_aggregate=FLAGS.child_num_aggregate, - num_replicas=FLAGS.child_num_replicas, - ) - - if FLAGS.child_fixed_arc is None: - controller_model = ControllerClass( - search_for=FLAGS.search_for, - search_whole_channels=FLAGS.controller_search_whole_channels, - skip_target=FLAGS.controller_skip_target, - skip_weight=FLAGS.controller_skip_weight, - num_cells=FLAGS.child_num_cells, - num_layers=FLAGS.child_num_layers, - num_branches=FLAGS.child_num_branches, - out_filters=FLAGS.child_out_filters, - lstm_size=64, - lstm_num_layers=1, - lstm_keep_prob=1.0, - tanh_constant=FLAGS.controller_tanh_constant, - op_tanh_reduce=FLAGS.controller_op_tanh_reduce, - temperature=FLAGS.controller_temperature, - lr_init=FLAGS.controller_lr, - lr_dec_start=0, - lr_dec_every=1000000, # never decrease learning rate - l2_reg=FLAGS.controller_l2_reg, - entropy_weight=FLAGS.controller_entropy_weight, - bl_dec=FLAGS.controller_bl_dec, - use_critic=FLAGS.controller_use_critic, - optim_algo="adam", - sync_replicas=FLAGS.controller_sync_replicas, - num_aggregate=FLAGS.controller_num_aggregate, - num_replicas=FLAGS.controller_num_replicas) - - child_model.connect_controller(controller_model) - controller_model.build_trainer(child_model) - - controller_ops = { - "train_step": controller_model.train_step, - "loss": controller_model.loss, - "train_op": controller_model.train_op, - "lr": controller_model.lr, - "grad_norm": controller_model.grad_norm, - "valid_acc": controller_model.valid_acc, - "optimizer": controller_model.optimizer, - "baseline": controller_model.baseline, - "entropy": controller_model.sample_entropy, - "sample_arc": controller_model.sample_arc, - "skip_rate": controller_model.skip_rate, + """ + Args: + images: dict with keys {"train", "valid", "test"}. + labels: dict with keys {"train", "valid", "test"}. + """ + + assert FLAGS.search_for is not None, "Please specify --search_for" + + if FLAGS.search_for == "micro": + ControllerClass = MicroController + ChildClass = MicroChild + else: + ControllerClass = GeneralController + ChildClass = GeneralChild + + child_model = ChildClass( + images, + labels, + use_aux_heads=FLAGS.child_use_aux_heads, + cutout_size=FLAGS.child_cutout_size, + whole_channels=FLAGS.controller_search_whole_channels, + num_layers=FLAGS.child_num_layers, + num_cells=FLAGS.child_num_cells, + num_branches=FLAGS.child_num_branches, + fixed_arc=FLAGS.child_fixed_arc, + out_filters_scale=FLAGS.child_out_filters_scale, + out_filters=FLAGS.child_out_filters, + keep_prob=FLAGS.child_keep_prob, + drop_path_keep_prob=FLAGS.child_drop_path_keep_prob, + num_epochs=FLAGS.num_epochs, + l2_reg=FLAGS.child_l2_reg, + data_format=FLAGS.data_format, + batch_size=FLAGS.batch_size, + clip_mode="norm", + grad_bound=FLAGS.child_grad_bound, + lr_init=FLAGS.child_lr, + lr_dec_every=FLAGS.child_lr_dec_every, + lr_dec_rate=FLAGS.child_lr_dec_rate, + lr_cosine=FLAGS.child_lr_cosine, + lr_max=FLAGS.child_lr_max, + lr_min=FLAGS.child_lr_min, + lr_T_0=FLAGS.child_lr_T_0, + lr_T_mul=FLAGS.child_lr_T_mul, + optim_algo=FLAGS.child_optimizer, + sync_replicas=FLAGS.child_sync_replicas, + num_aggregate=FLAGS.child_num_aggregate, + num_replicas=FLAGS.child_num_replicas, + valid_set_size=FLAGS.valid_set_size, + image_shape=(FLAGS.height_img, FLAGS.width_img, 3), + translation_only=FLAGS.translation_only, + rotation_only=FLAGS.rotation_only, + stacking_reward=FLAGS.stacking_reward, + use_root=FLAGS.use_root, + dataset=FLAGS.dataset, + data_base_path=FLAGS.data_base_path, + output_dir=FLAGS.output_dir, + pool_distance=FLAGS.child_pool_distance, + one_hot_encoding=FLAGS.one_hot_encoding, + use_msle=FLAGS.use_msle, + random_augmentation=FLAGS.random_augmentation + ) + if FLAGS.child_fixed_arc is None: + controller_model = ControllerClass( + search_for=FLAGS.search_for, + search_whole_channels=FLAGS.controller_search_whole_channels, + skip_target=FLAGS.controller_skip_target, + skip_weight=FLAGS.controller_skip_weight, + num_cells=FLAGS.child_num_cells, + num_layers=FLAGS.child_num_layers, + num_branches=FLAGS.child_num_branches, + out_filters=FLAGS.child_out_filters, + lstm_size=64, + lstm_num_layers=1, + lstm_keep_prob=1.0, + tanh_constant=FLAGS.controller_tanh_constant, + op_tanh_reduce=FLAGS.controller_op_tanh_reduce, + temperature=FLAGS.controller_temperature, + lr_init=FLAGS.controller_lr, + lr_dec_start=0, + lr_dec_every=1000000, # never decrease learning rate + l2_reg=FLAGS.controller_l2_reg, + entropy_weight=FLAGS.controller_entropy_weight, + bl_dec=FLAGS.controller_bl_dec, + use_critic=FLAGS.controller_use_critic, + optim_algo=FLAGS.controller_optimizer, + sync_replicas=FLAGS.controller_sync_replicas, + num_aggregate=FLAGS.controller_num_aggregate, + num_replicas=FLAGS.controller_num_replicas, + max_loss=FLAGS.max_loss, + dataset=FLAGS.dataset) + + child_model.connect_controller(controller_model) + controller_model.build_trainer(child_model) + + controller_ops = { + "train_step": controller_model.train_step, + "loss": controller_model.loss, + "train_op": controller_model.train_op, + "lr": controller_model.lr, + "grad_norm": controller_model.grad_norm, + "valid_acc": controller_model.valid_acc, + "optimizer": controller_model.optimizer, + "baseline": controller_model.baseline, + "entropy": controller_model.sample_entropy, + "sample_arc": controller_model.sample_arc, + "skip_rate": controller_model.skip_rate, + "reward": controller_model.reward, + "mse": controller_model.mse, + "cart_error": controller_model.cart_error, + "angle_error": controller_model.angle_error, + "mae": controller_model.mae, + # "g_emb": controller_model.g_emb, + } + else: + assert not FLAGS.controller_training, ( + "--child_fixed_arc is given, cannot train controller") + child_model.connect_controller(None) + controller_ops = None + + child_ops = { + "global_step": child_model.global_step, + "loss": child_model.loss, + "loss_sec": child_model.loss_secondary, + "train_op": child_model.train_op, + "lr": child_model.lr, + "grad_norm": child_model.grad_norm, + "train_acc": child_model.train_acc, + "train_acc_5mm_7_5deg": child_model.train_acc_5mm_7_5deg, + "train_acc_1cm_15deg": child_model.train_acc_1cm_15deg, + "train_acc_2cm_30deg": child_model.train_acc_2cm_30deg, + "train_acc_4cm_60deg": child_model.train_acc_4cm_60deg, + "train_acc_8cm_120deg": child_model.train_acc_8cm_120deg, + "train_acc_16cm_240deg": child_model.train_acc_16cm_240deg, + "train_acc_32cm_360deg": child_model.train_acc_32cm_360deg, + "optimizer": child_model.optimizer, + "num_train_batches": child_model.num_train_batches, + "train_angle_error": child_model.train_angle_error, + "train_cart_error": child_model.train_cart_error, + "train_mae": child_model.train_mae, + "train_preds": child_model.train_preds[0], + "train_label": child_model.y_train[0], } - else: - assert not FLAGS.controller_training, ( - "--child_fixed_arc is given, cannot train controller") - child_model.connect_controller(None) - controller_ops = None - - child_ops = { - "global_step": child_model.global_step, - "loss": child_model.loss, - "train_op": child_model.train_op, - "lr": child_model.lr, - "grad_norm": child_model.grad_norm, - "train_acc": child_model.train_acc, - "optimizer": child_model.optimizer, - "num_train_batches": child_model.num_train_batches, - } - - ops = { - "child": child_ops, - "controller": controller_ops, - "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs, - "eval_func": child_model.eval_once, - "num_train_batches": child_model.num_train_batches, - } - - return ops + ops = { + "child": child_ops, + "controller": controller_ops, + "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs, + "eval_func": child_model.eval_once, + "num_train_batches": child_model.num_train_batches, + } -def train(): - if FLAGS.child_fixed_arc is None: - images, labels = read_data(FLAGS.data_path, dataset = FLAGS.dataset) - else: - images, labels = read_data(FLAGS.data_path, num_valids = 0, dataset = FLAGS.dataset) - - g = tf.Graph() - with g.as_default(): - ops = get_ops(images, labels) - child_ops = ops["child"] - controller_ops = ops["controller"] - - saver = tf.train.Saver(max_to_keep=2) - checkpoint_saver_hook = tf.train.CheckpointSaverHook( - FLAGS.output_dir, save_steps=child_ops["num_train_batches"], saver=saver) - - hooks = [checkpoint_saver_hook] - if FLAGS.child_sync_replicas: - sync_replicas_hook = child_ops["optimizer"].make_session_run_hook(True) - hooks.append(sync_replicas_hook) - if FLAGS.controller_training and FLAGS.controller_sync_replicas: - sync_replicas_hook = controller_ops["optimizer"].make_session_run_hook(True) - hooks.append(sync_replicas_hook) + return ops - print("-" * 80) - print("Starting session") - config = tf.ConfigProto(allow_soft_placement=True) - with tf.train.SingularMonitoredSession( - config=config, hooks=hooks, checkpoint_dir=FLAGS.output_dir) as sess: - start_time = time.time() - while True: - run_ops = [ - child_ops["loss"], - child_ops["lr"], - child_ops["grad_norm"], - child_ops["train_acc"], - child_ops["train_op"], - ] - loss, lr, gn, tr_acc, _ = sess.run(run_ops) - global_step = sess.run(child_ops["global_step"]) - - if FLAGS.child_sync_replicas: - actual_step = global_step * FLAGS.num_aggregate - else: - actual_step = global_step - epoch = actual_step // ops["num_train_batches"] - curr_time = time.time() - if global_step % FLAGS.log_every == 0: - log_string = "" - log_string += "epoch={:<6d}".format(epoch) - log_string += "ch_step={:<6d}".format(global_step) - log_string += " loss={:<8.6f}".format(loss) - log_string += " lr={:<8.4f}".format(lr) - log_string += " |g|={:<8.4f}".format(gn) - log_string += " tr_acc={:<3d}/{:>3d}".format( - tr_acc, FLAGS.batch_size) - log_string += " mins={:<10.2f}".format( - float(curr_time - start_time) / 60) - print(log_string) - - if actual_step % ops["eval_every"] == 0: - if (FLAGS.controller_training and - epoch % FLAGS.controller_train_every == 0): - print("Epoch {}: Training controller".format(epoch)) - for ct_step in range(FLAGS.controller_train_steps * - FLAGS.controller_num_aggregate): + +def train(): + if FLAGS.child_fixed_arc is None: + images, labels = read_data(FLAGS.data_path, dataset=FLAGS.dataset) + else: + images, labels = read_data( + FLAGS.data_path, num_valids=0, dataset=FLAGS.dataset) + + g = tf.Graph() + with g.as_default(): + ops = get_ops(images, labels) + child_ops = ops["child"] + controller_ops = ops["controller"] + + saver = tf.train.Saver(max_to_keep=2) + checkpoint_saver_hook = tf.train.CheckpointSaverHook( + FLAGS.output_dir, save_steps=child_ops["num_train_batches"], saver=saver) + + hooks = [checkpoint_saver_hook] + if FLAGS.child_sync_replicas: + sync_replicas_hook = child_ops["optimizer"].make_session_run_hook( + True) + hooks.append(sync_replicas_hook) + if FLAGS.controller_training and FLAGS.controller_sync_replicas: + sync_replicas_hook = controller_ops["optimizer"].make_session_run_hook( + True) + hooks.append(sync_replicas_hook) + + print("-" * 80) + print("Starting session") + config = tf.ConfigProto(allow_soft_placement=True) + with tf.train.SingularMonitoredSession( + config=config, hooks=hooks, checkpoint_dir=FLAGS.output_dir) as sess: + start_time = time.time() + print("SingularMonitoredSession started..") + while True: run_ops = [ - controller_ops["loss"], - controller_ops["entropy"], - controller_ops["lr"], - controller_ops["grad_norm"], - controller_ops["valid_acc"], - controller_ops["baseline"], - controller_ops["skip_rate"], - controller_ops["train_op"], + child_ops["loss"], + child_ops["loss_sec"], + child_ops["lr"], + child_ops["grad_norm"], + child_ops["train_acc"], + child_ops["train_acc_5mm_7_5deg"], + child_ops["train_acc_1cm_15deg"], + child_ops["train_acc_2cm_30deg"], + child_ops["train_acc_4cm_60deg"], + child_ops["train_acc_8cm_120deg"], + child_ops["train_acc_16cm_240deg"], + child_ops["train_acc_32cm_360deg"], + child_ops["train_op"], + child_ops["train_angle_error"], + child_ops["train_cart_error"], + child_ops["train_mae"], + child_ops["train_preds"], + child_ops["train_label"], ] - loss, entropy, lr, gn, val_acc, bl, skip, _ = sess.run(run_ops) - controller_step = sess.run(controller_ops["train_step"]) - - if ct_step % FLAGS.log_every == 0: - curr_time = time.time() - log_string = "" - log_string += "ctrl_step={:<6d}".format(controller_step) - log_string += " loss={:<7.3f}".format(loss) - log_string += " ent={:<5.2f}".format(entropy) - log_string += " lr={:<6.4f}".format(lr) - log_string += " |g|={:<8.4f}".format(gn) - log_string += " acc={:<6.4f}".format(val_acc) - log_string += " bl={:<5.2f}".format(bl) - log_string += " mins={:<.2f}".format( - float(curr_time - start_time) / 60) - print(log_string) - - print("Here are 10 architectures") - for _ in range(10): - arc, acc = sess.run([ - controller_ops["sample_arc"], - controller_ops["valid_acc"], - ]) - if FLAGS.search_for == "micro": - normal_arc, reduce_arc = arc - print(np.reshape(normal_arc, [-1])) - print(np.reshape(reduce_arc, [-1])) + loss, loss_sec, lr, gn, tr_acc, tr_acc_5_7_5, tr_acc_1_15, tr_acc_2_30, tr_acc_4_60, tr_acc_8_120, tr_acc_16_240, tr_acc_32_360, tr_op, tr_angle_error, tr_cart_error, tr_mae, tr_preds, tr_label = sess.run( + run_ops) + global_step = sess.run(child_ops["global_step"]) + print("---------------global step", global_step, end="\r") + + if FLAGS.child_sync_replicas: + actual_step = global_step * FLAGS.num_aggregate else: - start = 0 - for layer_id in range(FLAGS.child_num_layers): - if FLAGS.controller_search_whole_channels: - end = start + 1 + layer_id + actual_step = global_step + epoch = actual_step // ops["num_train_batches"] + curr_time = time.time() + if global_step % FLAGS.log_every == 0: + log_string = "\n" + log_string += "epoch={:<6d}".format(epoch) + log_string += "ch_step={:<6d}".format(global_step) + log_string += " child_loss={}".format(loss) + log_string += " child_loss_sec={}".format(loss_sec) + # log_string += " child_loss={:<8.6f}".format(loss) + # log_string += " child_loss_sec={:<8.6f}".format(loss_sec) + log_string += " lr={:<8.4f}".format(lr) + log_string += " |g|={:<8.4f}".format(gn) + log_string += " child_tr_acc={:<3f}".format( + tr_acc) + log_string += "\nchild_tr_acc_5mm_7_5deg={:<3f}".format( + tr_acc_5_7_5) + log_string += "\nchild_tr_acc_1cm_15deg={:<3f}".format( + tr_acc_1_15) + log_string += "\nchild_tr_acc_2cm_30deg={:<3f}".format( + tr_acc_2_30) + log_string += "\nchild_tr_acc_4cm_60deg={:<3f}".format( + tr_acc_4_60) + log_string += "\nchild_tr_acc_8cm_120deg={:<3f}".format( + tr_acc_8_120) + log_string += "\nchild_tr_acc_16cm_240deg={:<3f}".format( + tr_acc_16_240) + log_string += "\nchild_tr_acc_32cm_360deg={:<3f}".format( + tr_acc_32_360) + log_string += " mins={:<10.2f}".format( + float(curr_time - start_time) / 60) + if FLAGS.dataset == "stacking": + if FLAGS.translation_only is False and FLAGS.stacking_reward is False: + log_string += "\ntr_ang_error={}".format(tr_angle_error) + if FLAGS.rotation_only is False and FLAGS.stacking_reward is False: + log_string += " tr_cart_error={}".format(tr_cart_error) + log_string += " tr_mae={}".format(tr_mae) + log_string += "\ntr_preds={}".format(tr_preds) + log_string += "\ntr_label={}".format(tr_label) + print(log_string) + if os.path.exists(os.path.join(FLAGS.output_dir,"train_metrics.csv")): + file_mode = 'a' else: - end = start + 2 * FLAGS.child_num_branches + layer_id - print(np.reshape(arc[start: end], [-1])) - start = end - print("val_acc={:<6.4f}".format(acc)) - print("-" * 80) - - print("Epoch {}: Eval".format(epoch)) - if FLAGS.child_fixed_arc is None: - ops["eval_func"](sess, "valid") - ops["eval_func"](sess, "test") - - if epoch >= FLAGS.num_epochs: - break + file_mode = 'w+' + with open(os.path.join(FLAGS.output_dir, "train_metrics.csv"), file_mode) as fp: + fp.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format( + epoch, global_step, loss, loss_sec, tr_acc, tr_acc_5_7_5, tr_acc_1_15, tr_acc_2_30, tr_acc_4_60, tr_acc_8_120, tr_acc_16_240, tr_acc_32_360, tr_op, tr_angle_error, tr_cart_error, tr_mae)) + + if actual_step % ops["eval_every"] == 0: + if (FLAGS.controller_training and + epoch % FLAGS.controller_train_every == 0): + print("Epoch {}: Training controller".format(epoch)) + for ct_step in range(FLAGS.controller_train_steps * + FLAGS.controller_num_aggregate): + run_ops = [ + controller_ops["loss"], + controller_ops["entropy"], + controller_ops["lr"], + controller_ops["grad_norm"], + controller_ops["valid_acc"], + controller_ops["baseline"], + controller_ops["reward"], + controller_ops["mse"], + controller_ops["cart_error"], + controller_ops["angle_error"], + controller_ops["mae"], + controller_ops["skip_rate"], + controller_ops["train_op"], + ] + loss, entropy, lr, gn, val_acc, bl, reward, c_mse, cart_error, angle_error, mae, skip, _ = sess.run( + run_ops) + controller_step = sess.run( + controller_ops["train_step"]) + + if ct_step % FLAGS.log_every == 0: + curr_time = time.time() + log_string = "\n" + log_string += "ctrl_step={:<6d}".format( + controller_step) + log_string += " controller_loss={:<7.3f}".format(loss) + log_string += " ent={:<5.2f}".format(entropy) + log_string += " lr={:<6.4f}".format(lr) + log_string += " |g|={:<8.4f}".format(gn) + log_string += " acc={:<6.4f}".format(val_acc) + log_string += " bl={:<5.2f}".format(bl) + log_string += " mins={:<.2f}".format( + float(curr_time - start_time) / 60) + log_string += " rw ={}".format(reward) + log_string += " loss ={}".format(c_mse) + if FLAGS.dataset == "stacking": + if FLAGS.rotation_only is False and FLAGS.stacking_reward is False: + log_string += "\ncart_error={}".format(cart_error) + if FLAGS.translation_only is False and FLAGS.stacking_reward is False: + log_string += "\nangle_error={}".format(angle_error) + log_string += "\nmae={}".format(mae) + # log_string += "\n g_emb = {}".format(g_emb) + print(log_string) + if os.path.exists(os.path.join(FLAGS.output_dir, "controller_metrics.csv")): + file_mode = 'a' + else: + file_mode = 'w+' + with open(os.path.join(FLAGS.output_dir, "controller_metrics.csv"), file_mode) as fp: + fp.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(epoch, controller_step, loss, entropy, lr, gn, val_acc, bl, reward, c_mse, cart_error, angle_error, mae)) + + print("Here are 10 architectures") + for _ in range(10): + arc, acc, c_loss, mse, selected_cart_error, selected_angle_error, selected_mae = sess.run([ + controller_ops["sample_arc"], + controller_ops["valid_acc"], + controller_ops["loss"], + controller_ops["mse"], + controller_ops["cart_error"], + controller_ops["angle_error"], + controller_ops["mae"], + ]) + if FLAGS.search_for == "micro": + normal_arc, reduce_arc = arc + print(np.reshape(normal_arc, [-1])) + print(np.reshape(reduce_arc, [-1])) + else: + start = 0 + for layer_id in range(FLAGS.child_num_layers): + if FLAGS.controller_search_whole_channels: + end = start + 1 + layer_id + else: + end = start + 2 * FLAGS.child_num_branches + layer_id + print(np.reshape(arc[start: end], [-1])) + start = end + print("val_acc={:<6.4f}".format(acc)) + print("controller_loss={}".format(c_loss)) + if FLAGS.dataset == "stacking": + print("mse={}".format(mse)) + if FLAGS.rotation_only is False and FLAGS.stacking_reward is False: + print("cart_error={}".format(selected_cart_error)) + if FLAGS.translation_only is False and FLAGS.stacking_reward is False: + print("angle_error={}".format(selected_angle_error)) + print("mae={}".format(selected_mae)) + print("-" * 80) + + print("Epoch {}: Eval".format(epoch)) + # print(np.reshape(normal_arc, [-1])) + # print(np.reshape(reduce_arc, [-1])) + ops["eval_func"](sess, "valid") + # print(np.reshape(normal_arc, [-1])) + # print(np.reshape(reduce_arc, [-1])) + ops["eval_func"](sess, "test") + + if epoch >= FLAGS.num_epochs: + break def main(_): - print("-" * 80) - if not os.path.isdir(FLAGS.output_dir): - print("Path {} does not exist. Creating.".format(FLAGS.output_dir)) - os.makedirs(FLAGS.output_dir) - elif FLAGS.reset_output_dir: - print("Path {} exists. Remove and remake.".format(FLAGS.output_dir)) - shutil.rmtree(FLAGS.output_dir) - os.makedirs(FLAGS.output_dir) - - print("-" * 80) - log_file = os.path.join(FLAGS.output_dir, "stdout") - print("Logging to {}".format(log_file)) - sys.stdout = Logger(log_file) + print("-" * 80) + if not os.path.isdir(FLAGS.output_dir): + print("Path {} does not exist. Creating.".format(FLAGS.output_dir)) + os.makedirs(FLAGS.output_dir) + elif FLAGS.reset_output_dir: + print("Path {} exists. Remove and remake.".format(FLAGS.output_dir)) + shutil.rmtree(FLAGS.output_dir) + os.makedirs(FLAGS.output_dir) - utils.print_user_flags() - train() + print("-" * 80) + log_file = os.path.join(FLAGS.output_dir, "stdout") + print("Logging to {}".format(log_file)) + sys.stdout = Logger(log_file) + + utils.print_user_flags() + with open(os.path.join(FLAGS.output_dir, "controller_metrics.csv"), 'w') as fp: + fp.write("epoch, controller_step, loss, entropy, lr, gn, val_acc, bl, reward, c_mse, cart_error, angle_error, mae") + with open(os.path.join(FLAGS.output_dir, "train_metrics.csv"), 'w') as fp: + fp.write("epoch, global_step, loss, loss_sec, tr_acc, tr_acc_5_7_5, tr_acc_1_15, tr_acc_2_30, tr_acc_4_60, tr_acc_8_120, tr_acc_16_240, tr_acc_32_360, tr_op, tr_angle_error, tr_cart_error, tr_mae") + with open(os.path.join(FLAGS.output_dir, "valid_metrics.csv"), 'w') as fp: + fp.write("total_acc, total_acc_5mm_7_5deg, total_acc_1cm_15deg, total_acc_2_30, total_acc_4_60, total_acc_8_120, total_acc_16cm_240deg, total_acc_32cm_360deg, total_loss, total_mae, total_angle_error, total_cart_error, total_loss_sec") + with open(os.path.join(FLAGS.output_dir, "test_metrics.csv"), 'w') as fp: + fp.write("total_acc, total_acc_5mm_7_5deg, total_acc_1cm_15deg, total_acc_2_30, total_acc_4_60, total_acc_8_120, total_acc_16cm_240deg, total_acc_32cm_360deg, total_loss, total_mae, total_angle_error, total_cart_error, total_loss_sec") + train() if __name__ == "__main__": - tf.app.run() + tf.app.run() diff --git a/enas/cifar10/micro_child.py b/enas/cifar10/micro_child.py index 5102b98..a79992a 100644 --- a/enas/cifar10/micro_child.py +++ b/enas/cifar10/micro_child.py @@ -4,6 +4,7 @@ import os import sys +import traceback import numpy as np import tensorflow as tf @@ -11,812 +12,1446 @@ from enas.cifar10.models import Model from enas.cifar10.image_ops import conv from enas.cifar10.image_ops import fully_connected -from enas.cifar10.image_ops import batch_norm +from enas.cifar10.image_ops import norm from enas.cifar10.image_ops import batch_norm_with_mask from enas.cifar10.image_ops import relu from enas.cifar10.image_ops import max_pool from enas.cifar10.image_ops import drop_path -from enas.cifar10.image_ops import global_avg_pool +from enas.cifar10.image_ops import global_max_pool from enas.utils import count_model_params from enas.utils import get_train_ops from enas.common_ops import create_weight +import keras + +import grasp_metrics + class MicroChild(Model): - def __init__(self, - images, - labels, - use_aux_heads=False, - cutout_size=None, - fixed_arc=None, - num_layers=2, - num_cells=5, - out_filters=24, - keep_prob=1.0, - drop_path_keep_prob=None, - batch_size=32, - clip_mode=None, - grad_bound=None, - l2_reg=1e-4, - lr_init=0.1, - lr_dec_start=0, - lr_dec_every=10000, - lr_dec_rate=0.1, - lr_cosine=False, - lr_max=None, - lr_min=None, - lr_T_0=None, - lr_T_mul=None, - num_epochs=None, - optim_algo=None, - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - data_format="NHWC", - name="child", - **kwargs - ): - """ - """ - - super(self.__class__, self).__init__( - images, - labels, - cutout_size=cutout_size, - batch_size=batch_size, - clip_mode=clip_mode, - grad_bound=grad_bound, - l2_reg=l2_reg, - lr_init=lr_init, - lr_dec_start=lr_dec_start, - lr_dec_every=lr_dec_every, - lr_dec_rate=lr_dec_rate, - keep_prob=keep_prob, - optim_algo=optim_algo, - sync_replicas=sync_replicas, - num_aggregate=num_aggregate, - num_replicas=num_replicas, - data_format=data_format, - name=name) - - if self.data_format == "NHWC": - self.actual_data_format = "channels_last" - elif self.data_format == "NCHW": - self.actual_data_format = "channels_first" - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - - self.use_aux_heads = use_aux_heads - self.num_epochs = num_epochs - self.num_train_steps = self.num_epochs * self.num_train_batches - self.drop_path_keep_prob = drop_path_keep_prob - self.lr_cosine = lr_cosine - self.lr_max = lr_max - self.lr_min = lr_min - self.lr_T_0 = lr_T_0 - self.lr_T_mul = lr_T_mul - self.out_filters = out_filters - self.num_layers = num_layers - self.num_cells = num_cells - self.fixed_arc = fixed_arc - - self.global_step = tf.Variable( - 0, dtype=tf.int32, trainable=False, name="global_step") - - if self.drop_path_keep_prob is not None: - assert num_epochs is not None, "Need num_epochs to drop_path" - - pool_distance = self.num_layers // 3 - self.pool_layers = [pool_distance, 2 * pool_distance + 1] - - if self.use_aux_heads: - self.aux_head_indices = [self.pool_layers[-1] + 1] - - def _factorized_reduction(self, x, out_filters, stride, is_training): - """Reduces the shape of x without information loss due to striding.""" - assert out_filters % 2 == 0, ( - "Need even number of filters when using this factorized reduction.") - if stride == 1: - with tf.variable_scope("path_conv"): - inp_c = self._get_C(x) - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) + def __init__(self, + images, + labels, + use_aux_heads=False, + cutout_size=None, + fixed_arc=None, + num_layers=2, + num_cells=5, + out_filters=24, + keep_prob=1.0, + drop_path_keep_prob=None, + batch_size=32, + clip_mode=None, + grad_bound=None, + l2_reg=1e-4, + lr_init=0.1, + lr_dec_start=0, + lr_dec_every=10000, + lr_dec_rate=0.1, + lr_cosine=False, + lr_max=None, + lr_min=None, + lr_T_0=None, + lr_T_mul=None, + num_epochs=None, + optim_algo=None, + sync_replicas=False, + num_aggregate=None, + num_replicas=None, + data_format="NHWC", + name="child", + valid_set_size=32, + image_shape=(32, 32, 3), + translation_only=False, + rotation_only=False, + stacking_reward=False, + use_root=False, + one_hot_encoding=False, + dataset="cifar", + data_base_path="", + output_dir="", + pool_distance=2, + use_msle=False, + **kwargs + ): + + super(self.__class__, self).__init__( + images, + labels, + cutout_size=cutout_size, + batch_size=batch_size, + clip_mode=clip_mode, + grad_bound=grad_bound, + l2_reg=l2_reg, + lr_init=lr_init, + lr_dec_start=lr_dec_start, + lr_dec_every=lr_dec_every, + lr_dec_rate=lr_dec_rate, + keep_prob=keep_prob, + optim_algo=optim_algo, + sync_replicas=sync_replicas, + num_aggregate=num_aggregate, + num_replicas=num_replicas, + data_format=data_format, + name=name, + valid_set_size=valid_set_size, + image_shape=image_shape, + translation_only=translation_only, + rotation_only=rotation_only, + stacking_reward=stacking_reward, + data_base_path=data_base_path, + use_root=use_root, + one_hot_encoding=one_hot_encoding, + dataset=dataset) + + if self.data_format == "NHWC": + self.actual_data_format = "channels_last" + elif self.data_format == "NCHW": + self.actual_data_format = "channels_first" + else: + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + + self.use_aux_heads = use_aux_heads + self.use_root = use_root + self.num_epochs = num_epochs + self.num_train_steps = self.num_epochs * self.num_train_batches + self.drop_path_keep_prob = drop_path_keep_prob + self.lr_cosine = lr_cosine + self.lr_max = lr_max + self.lr_min = lr_min + self.lr_T_0 = lr_T_0 + self.lr_T_mul = lr_T_mul + self.out_filters = out_filters + self.num_layers = num_layers + self.num_cells = num_cells + self.fixed_arc = fixed_arc + self.translation_only = translation_only + self.rotation_only = rotation_only + self.stacking_reward = stacking_reward + self.data_base_path = data_base_path + self.verbose = 0 + self.output_dir = output_dir + self.one_hot_encoding = one_hot_encoding + self.use_msle = use_msle + + self.global_step = tf.Variable( + 0, dtype=tf.int32, trainable=False, name="global_step") + + if self.drop_path_keep_prob is not None: + assert num_epochs is not None, "Need num_epochs to drop_path" + + self.pool_distance = pool_distance + # pool_distance was originally based on the number of layers + # pool_distance = self.num_layers // 3 + # self.pool_layers = [pool_distance, 2 * pool_distance + 1] + + self.pool_layers = [] + for layer_num in range(self.num_layers): + if layer_num != 0 and layer_num % pool_distance == 0: + self.pool_layers += [layer_num] + + if self.use_aux_heads: + if len(self.pool_layers) > 2: + pool_index = int(len(self.pool_layers) / 2) + self.aux_head_indices = [self.pool_layers[pool_index] + 1] + else: + self.aux_head_indices = [self.pool_layers[-1] + 1] + + def _factorized_reduction(self, x, out_filters, stride, is_training): + """Reduces the shape of x without information loss due to striding.""" + assert out_filters % 2 == 0, ( + "Need even number of filters when using this factorized\ + reduction.") + if stride == 1: + with tf.variable_scope("path_conv"): + inp_c = self._get_C(x) + w = create_weight("w", [1, 1, inp_c, out_filters]) + x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + x = norm(x, is_training=is_training, data_format=self.data_format, norm_type="batch") + return x + + stride_spec = self._get_strides(stride) + # Skip path 1 + path1 = tf.nn.max_pool( + x, [1, 1, 1, 1], stride_spec, "VALID", + data_format=self.data_format) + with tf.variable_scope("path1_conv"): + inp_c = self._get_C(path1) + w = create_weight("w", [1, 1, inp_c, out_filters // 2]) + path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "VALID", + data_format=self.data_format) + + # Skip path 2 + # First pad with 0"s on the right and bottom, then shift the filter to + # include those 0"s that were added. + if self.data_format == "NHWC": + pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] + path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :] + concat_axis = 3 + else: + pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] + path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:] + concat_axis = 1 + + path2 = tf.nn.max_pool( + path2, [1, 1, 1, 1], stride_spec, "VALID", + data_format=self.data_format) + with tf.variable_scope("path2_conv"): + inp_c = self._get_C(path2) + w = create_weight("w", [1, 1, inp_c, out_filters // 2]) + path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "VALID", + data_format=self.data_format) + + # Concat and apply BN + final_path = tf.concat(values=[path1, path2], axis=concat_axis) + final_path = norm(final_path, is_training=is_training, + data_format=self.data_format, norm_type="batch") + + return final_path + + def _get_C(self, x): + """ + Args: + x: tensor of shape [N, H, W, C] or [N, C, H, W] + """ + if self.data_format == "NHWC": + assert x.get_shape().as_list()[3] is not None + return x.get_shape()[3].value + elif self.data_format == "NCHW": + assert x.get_shape().as_list()[1] is not None + return x.get_shape()[1].value + else: + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + + def _get_HW(self, x): + """ + Args: + x: tensor of shape [N, H, W, C] or [N, C, H, W] + """ + assert x.get_shape().as_list()[2] is not None + return x.get_shape()[2].value + + def _get_strides(self, stride): + """ + Args: + x: tensor of shape [N, H, W, C] or [N, C, H, W] + """ + if self.data_format == "NHWC": + return [1, stride, stride, 1] + elif self.data_format == "NCHW": + return [1, 1, stride, stride] + else: + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + + def _apply_drop_path(self, x, layer_id): + drop_path_keep_prob = self.drop_path_keep_prob + + layer_ratio = float(layer_id + 1) / (self.num_layers + 2) + drop_path_keep_prob = 1.0 - layer_ratio * (1.0 - drop_path_keep_prob) + + step_ratio = tf.to_float(self.global_step + 1) / \ + tf.to_float(self.num_train_steps) + step_ratio = tf.minimum(1.0, step_ratio) + drop_path_keep_prob = 1.0 - step_ratio * (1.0 - drop_path_keep_prob) + + x = drop_path(x, drop_path_keep_prob) return x - stride_spec = self._get_strides(stride) - # Skip path 1 - path1 = tf.nn.avg_pool( - x, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) - with tf.variable_scope("path1_conv"): - inp_c = self._get_C(path1) - w = create_weight("w", [1, 1, inp_c, out_filters // 2]) - path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "VALID", - data_format=self.data_format) - - # Skip path 2 - # First pad with 0"s on the right and bottom, then shift the filter to - # include those 0"s that were added. - if self.data_format == "NHWC": - pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] - path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :] - concat_axis = 3 - else: - pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] - path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:] - concat_axis = 1 - - path2 = tf.nn.avg_pool( - path2, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) - with tf.variable_scope("path2_conv"): - inp_c = self._get_C(path2) - w = create_weight("w", [1, 1, inp_c, out_filters // 2]) - path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "VALID", - data_format=self.data_format) - - # Concat and apply BN - final_path = tf.concat(values=[path1, path2], axis=concat_axis) - final_path = batch_norm(final_path, is_training, - data_format=self.data_format) - - return final_path - - def _get_C(self, x): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - if self.data_format == "NHWC": - return x.get_shape()[3].value - elif self.data_format == "NCHW": - return x.get_shape()[1].value - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - - def _get_HW(self, x): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - return x.get_shape()[2].value - - def _get_strides(self, stride): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - if self.data_format == "NHWC": - return [1, stride, stride, 1] - elif self.data_format == "NCHW": - return [1, 1, stride, stride] - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - - def _apply_drop_path(self, x, layer_id): - drop_path_keep_prob = self.drop_path_keep_prob - - layer_ratio = float(layer_id + 1) / (self.num_layers + 2) - drop_path_keep_prob = 1.0 - layer_ratio * (1.0 - drop_path_keep_prob) - - step_ratio = tf.to_float(self.global_step + 1) / tf.to_float(self.num_train_steps) - step_ratio = tf.minimum(1.0, step_ratio) - drop_path_keep_prob = 1.0 - step_ratio * (1.0 - drop_path_keep_prob) - - x = drop_path(x, drop_path_keep_prob) - return x - - def _maybe_calibrate_size(self, layers, out_filters, is_training): - """Makes sure layers[0] and layers[1] have the same shapes.""" - - hw = [self._get_HW(layer) for layer in layers] - c = [self._get_C(layer) for layer in layers] - - with tf.variable_scope("calibrate"): - x = layers[0] - if hw[0] != hw[1]: - assert hw[0] == 2 * hw[1] - with tf.variable_scope("pool_x"): - x = tf.nn.relu(x) - x = self._factorized_reduction(x, out_filters, 2, is_training) - elif c[0] != out_filters: - with tf.variable_scope("pool_x"): - w = create_weight("w", [1, 1, c[0], out_filters]) - x = tf.nn.relu(x) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - - y = layers[1] - if c[1] != out_filters: - with tf.variable_scope("pool_y"): - w = create_weight("w", [1, 1, c[1], out_filters]) - y = tf.nn.relu(y) - y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - y = batch_norm(y, is_training, data_format=self.data_format) - return [x, y] - - def _model(self, images, is_training, reuse=False): - """Compute the logits given the images.""" - - if self.fixed_arc is None: - is_training = True - - with tf.variable_scope(self.name, reuse=reuse): - # the first two inputs - input_channels = self._get_C(images) - with tf.variable_scope("stem_conv"): - w = create_weight("w", [input_channels, input_channels, input_channels, self.out_filters * 3]) - x = tf.nn.conv2d( - images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - if self.data_format == "NHWC": - split_axis = 3 - elif self.data_format == "NCHW": - split_axis = 1 - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - layers = [x, x] - - # building layers in the micro space - out_filters = self.out_filters - for layer_id in range(self.num_layers + 2): - with tf.variable_scope("layer_{0}".format(layer_id)): - if layer_id not in self.pool_layers: - if self.fixed_arc is None: - x = self._enas_layer( - layer_id, layers, self.normal_arc, out_filters) + def _maybe_calibrate_size(self, layers, out_filters, is_training): + """Makes sure layers[0] and layers[1] have the same shapes.""" + + hw = [self._get_HW(layer) for layer in layers] + c = [self._get_C(layer) for layer in layers] + + with tf.variable_scope("calibrate"): + x = layers[0] + if hw[0] != hw[1]: + assert hw[0] == 2 * hw[1] + with tf.variable_scope("pool_x"): + x = tf.nn.elu(x) + x = self._factorized_reduction( + x, out_filters, 2, is_training) + elif c[0] != out_filters: + with tf.variable_scope("pool_x"): + w = create_weight("w", [1, 1, c[0], out_filters]) + x = tf.nn.elu(x) + x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + x = norm( + x, is_training=is_training, data_format=self.data_format, norm_type="batch") + + y = layers[1] + if c[1] != out_filters: + with tf.variable_scope("pool_y"): + w = create_weight("w", [1, 1, c[1], out_filters]) + y = tf.nn.elu(y) + y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + y = norm( + y, is_training=is_training, data_format=self.data_format, norm_type="batch") + return [x, y] + + def concat_images_with_tiled_vector(images, vector): + """Combine a set of images with a vector, tiling the vector at each pixel in the images and concatenating on the channel axis. + + # Params + + images: list of images with the same dimensions + vector: vector to tile on each image. If you have + more than one vector, simply concatenate them + all before calling this function. + + # Returns + + """ + with tf.variable_scope('concat_images_with_tiled_vector'): + if not isinstance(images, list): + images = [images] + image_shape = K.int_shape(images[0]) + tiled_vector = tile_vector_as_image_channels(vector, image_shape) + images.append(tiled_vector) + combined = K.concatenate(images) + + return combined + + def _model(self, images, is_training, reuse=False): + """Compute the logits given the images.""" + + # TODO(ahundt) this line doesn't seem correct, because if doing eval with fixed arcs, training should definitely be false + # if self.fixed_arc is None: + # is_training = True + + with tf.variable_scope(self.name, reuse=reuse): + # Conv for 2 seperate stacking images + if self.dataset == "stacking" and self.use_root is True: + # input_channels_1 = self._get_C(images[0]) + # input_channels_2 = self._get_C(images[1]) + with tf.variable_scope("init_root"): + w_1 = create_weight( + "w_1", [3, 3, 3, 64]) + x_1 = tf.nn.conv2d( + images[:, :, :, :3], w_1, [1, 1, 1, 1], "SAME") + x_1 = norm(x_1, is_training=is_training, data_format=self.data_format, norm_type="batch", name="x_1_norm") + x_1 = tf.nn.elu(x_1, name='elu_x_1') + w_2 = create_weight( + "w_2", [3, 3, 3, 64]) + x_2 = tf.nn.conv2d( + images[:, :, :, 3:6], w_2, [1, 1, 1, 1], "SAME") + x_2 = norm(x_2, is_training=is_training, data_format=self.data_format, norm_type="batch", name="x_2_norm") + x_2 = tf.nn.elu(x_2, name='elu_x_2') + x_3 = tf.layers.dense(images[:, :, :, 6:], units=2048, activation=tf.nn.relu) + # dropout + x_3 = tf.nn.dropout(x_3, 0.25) + # x_3 = tf.layers.dense(x_3, units=64, activation=tf.nn.relu) + + # dense_layer + # tiling of images + print("shape of x_1--", x_1.shape) + image = [x_1, x_2] + print("shape of x_3--", len(image)) + x = tf.concat([x_1, x_2, x_3], axis=-1) + print("shape after concat", x.shape) + + # the first two inputs + if self.dataset == "stacking" and self.use_root is True: + input_channels = self._get_C(x) else: - x = self._fixed_layer( - layer_id, layers, self.normal_arc, out_filters, 1, is_training, - normal_or_reduction_cell="normal") - else: - out_filters *= 2 - if self.fixed_arc is None: - x = self._factorized_reduction(x, out_filters, 2, is_training) - layers = [layers[-1], x] - x = self._enas_layer( - layer_id, layers, self.reduce_arc, out_filters) + input_channels = self._get_C(images) + print("channels--------------------------", input_channels) + with tf.variable_scope("stem_conv"): + w = create_weight( + "w", [3, 3, input_channels, + self.out_filters * 3]) + if self.use_root is True: + x = tf.nn.conv2d( + x, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + else: + x = tf.nn.conv2d( + images, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + x = norm(x, is_training=is_training, data_format=self.data_format, norm_type="batch") + if self.data_format == "NHWC": + split_axis = 3 + elif self.data_format == "NCHW": + split_axis = 1 else: - x = self._fixed_layer( - layer_id, layers, self.reduce_arc, out_filters, 2, is_training, - normal_or_reduction_cell="reduction") - print("Layer {0:>2d}: {1}".format(layer_id, x)) - layers = [layers[-1], x] - - # auxiliary heads - self.num_aux_vars = 0 - if (self.use_aux_heads and - layer_id in self.aux_head_indices - and is_training): - print("Using aux_head at layer {0}".format(layer_id)) - with tf.variable_scope("aux_head"): - aux_logits = tf.nn.relu(x) - aux_logits = tf.layers.average_pooling2d( - aux_logits, [5, 5], [3, 3], "VALID", - data_format=self.actual_data_format) - with tf.variable_scope("proj"): - inp_c = self._get_C(aux_logits) - w = create_weight("w", [1, 1, inp_c, 128]) - aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - aux_logits = batch_norm(aux_logits, is_training=True, - data_format=self.data_format) - aux_logits = tf.nn.relu(aux_logits) - - with tf.variable_scope("avg_pool"): - inp_c = self._get_C(aux_logits) - hw = self._get_HW(aux_logits) - w = create_weight("w", [hw, hw, inp_c, 768]) - aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - aux_logits = batch_norm(aux_logits, is_training=True, - data_format=self.data_format) - aux_logits = tf.nn.relu(aux_logits) - + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + layers = [x, x] + + # building layers in the micro space + out_filters = self.out_filters + for layer_id in range(self.num_layers + 2): + with tf.variable_scope("layer_{0}".format(layer_id)): + if layer_id not in self.pool_layers: + if self.fixed_arc is None: + x = self._enas_layer( + layer_id, layers, self.normal_arc, out_filters, + is_training=is_training) + else: + x = self._fixed_layer( + layer_id, layers, self.normal_arc, out_filters, + 1, is_training=is_training, + normal_or_reduction_cell="normal") + else: + out_filters *= 2 + if self.fixed_arc is None: + x = self._factorized_reduction( + x, out_filters, 2, is_training) + layers = [layers[-1], x] + x = self._enas_layer( + layer_id, layers, self.reduce_arc, out_filters, + is_training=is_training) + else: + x = self._fixed_layer( + layer_id, layers, self.reduce_arc, out_filters, + 2, is_training=is_training, + normal_or_reduction_cell="reduction") + print("Layer {0:>2d}: {1}".format(layer_id, x)) + layers = [layers[-1], x] + + # auxiliary heads + self.num_aux_vars = 0 + if (self.use_aux_heads and + layer_id in self.aux_head_indices + and is_training): + print("Using aux_head at layer {0}".format(layer_id)) + with tf.variable_scope("aux_head"): + aux_logits = tf.nn.elu(x) + aux_logits = tf.layers.average_pooling2d( + aux_logits, [5, 5], [3, 3], "VALID", + data_format=self.actual_data_format) + with tf.variable_scope("proj"): + inp_c = self._get_C(aux_logits) + w = create_weight("w", [1, 1, inp_c, 128]) + aux_logits = tf.nn.conv2d(aux_logits, w, + [1, 1, 1, 1], "SAME", + data_format=self.data_format) + aux_logits = norm(aux_logits, + is_training=is_training, + data_format=self.data_format, norm_type="batch") + aux_logits = tf.nn.elu(aux_logits) + + with tf.variable_scope("avg_pool"): + inp_c = self._get_C(aux_logits) + hw = self._get_HW(aux_logits) + w = create_weight("w", [hw, hw, inp_c, 768]) + aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + aux_logits = norm(aux_logits, is_training=is_training, + data_format=self.data_format, norm_type="batch") + aux_logits = tf.nn.elu(aux_logits) + + with tf.variable_scope("fc"): + aux_logits = global_max_pool(aux_logits, + data_format=self.data_format) + inp_c = aux_logits.get_shape()[1].value + w = create_weight("w", [inp_c, self.num_classes]) + aux_logits = tf.matmul(aux_logits, w) + self.aux_logits = aux_logits + + aux_head_variables = [ + var for var in tf.trainable_variables() if ( + var.name.startswith(self.name) and "aux_head" in var.name)] + self.num_aux_vars = count_model_params(aux_head_variables) + print("Aux head uses {0} params".format(self.num_aux_vars)) + + x = tf.nn.elu(x) + x = global_max_pool(x, data_format=self.data_format) + if is_training and self.keep_prob is not None and self.keep_prob < 1.0: + x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): - aux_logits = global_avg_pool(aux_logits, - data_format=self.data_format) - inp_c = aux_logits.get_shape()[1].value - w = create_weight("w", [inp_c, 10]) - aux_logits = tf.matmul(aux_logits, w) - self.aux_logits = aux_logits - - aux_head_variables = [ - var for var in tf.trainable_variables() if ( - var.name.startswith(self.name) and "aux_head" in var.name)] - self.num_aux_vars = count_model_params(aux_head_variables) - print("Aux head uses {0} params".format(self.num_aux_vars)) - - x = tf.nn.relu(x) - x = global_avg_pool(x, data_format=self.data_format) - if is_training and self.keep_prob is not None and self.keep_prob < 1.0: - x = tf.nn.dropout(x, self.keep_prob) - with tf.variable_scope("fc"): - inp_c = x.get_shape()[1] - w = create_weight("w", [inp_c, 10]) - x = tf.matmul(x, w) - return x - - def _fixed_conv(self, x, f_size, out_filters, stride, is_training, - stack_convs=2): - """Apply fixed convolution. - - Args: - stacked_convs: number of separable convs to apply. - """ - - for conv_id in range(stack_convs): - inp_c = self._get_C(x) - if conv_id == 0: - strides = self._get_strides(stride) - else: - strides = [1, 1, 1, 1] - - with tf.variable_scope("sep_conv_{}".format(conv_id)): - w_depthwise = create_weight("w_depth", [f_size, f_size, inp_c, 1]) - w_pointwise = create_weight("w_point", [1, 1, inp_c, out_filters]) - x = tf.nn.relu(x) - x = tf.nn.separable_conv2d( - x, - depthwise_filter=w_depthwise, - pointwise_filter=w_pointwise, - strides=strides, padding="SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - - return x - - def _fixed_combine(self, layers, used, out_filters, is_training, - normal_or_reduction_cell="normal"): - """Adjust if necessary. - - Args: - layers: a list of tf tensors of size [NHWC] of [NCHW]. - used: a numpy tensor, [0] means not used. - """ - - out_hw = min([self._get_HW(layer) - for i, layer in enumerate(layers) if used[i] == 0]) - out = [] - - with tf.variable_scope("final_combine"): - for i, layer in enumerate(layers): - if used[i] == 0: - hw = self._get_HW(layer) - if hw > out_hw: - assert hw == out_hw * 2, ("i_hw={0} != {1}=o_hw".format(hw, out_hw)) - with tf.variable_scope("calibrate_{0}".format(i)): - x = self._factorized_reduction(layer, out_filters, 2, is_training) - else: - x = layer - out.append(x) - - if self.data_format == "NHWC": - out = tf.concat(out, axis=3) - elif self.data_format == "NCHW": - out = tf.concat(out, axis=1) - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - - return out - - def _fixed_layer(self, layer_id, prev_layers, arc, out_filters, stride, - is_training, normal_or_reduction_cell="normal"): - """ - Args: - prev_layers: cache of previous layers. for skip connections - is_training: for batch_norm - """ - - assert len(prev_layers) == 2 - layers = [prev_layers[0], prev_layers[1]] - layers = self._maybe_calibrate_size(layers, out_filters, - is_training=is_training) - - with tf.variable_scope("layer_base"): - x = layers[1] - inp_c = self._get_C(x) - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.relu(x) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - layers[1] = x - - used = np.zeros([self.num_cells + 2], dtype=np.int32) - f_sizes = [3, 5] - for cell_id in range(self.num_cells): - with tf.variable_scope("cell_{}".format(cell_id)): - x_id = arc[4 * cell_id] - used[x_id] += 1 - x_op = arc[4 * cell_id + 1] - x = layers[x_id] - x_stride = stride if x_id in [0, 1] else 1 - with tf.variable_scope("x_conv"): - if x_op in [0, 1]: - f_size = f_sizes[x_op] - x = self._fixed_conv(x, f_size, out_filters, x_stride, is_training) - elif x_op in [2, 3]: + inp_c = x.get_shape()[1] + # print("inp_c--------------",inp_c) + # print("shape x model --------------", x.shape) + w = create_weight("w", [inp_c, self.num_classes]) + x = tf.matmul(x, w) + return x + + def _fixed_conv(self, x, f_size, out_filters, stride, is_training, + stack_convs=2): + """Apply fixed convolution. + + Args: + stacked_convs: number of separable convs to apply. + """ + + for conv_id in range(stack_convs): inp_c = self._get_C(x) - if x_op == 2: - x = tf.layers.average_pooling2d( - x, [3, 3], [x_stride, x_stride], "SAME", - data_format=self.actual_data_format) + if conv_id == 0: + strides = self._get_strides(stride) else: - x = tf.layers.max_pooling2d( - x, [3, 3], [x_stride, x_stride], "SAME", - data_format=self.actual_data_format) - if inp_c != out_filters: - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.relu(x) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - else: - inp_c = self._get_C(x) - if x_stride > 1: - assert x_stride == 2 - x = self._factorized_reduction(x, out_filters, 2, is_training) - if inp_c != out_filters: - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.relu(x) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - if (x_op in [0, 1, 2, 3] and - self.drop_path_keep_prob is not None and - is_training): - x = self._apply_drop_path(x, layer_id) - - y_id = arc[4 * cell_id + 2] - used[y_id] += 1 - y_op = arc[4 * cell_id + 3] - y = layers[y_id] - y_stride = stride if y_id in [0, 1] else 1 - with tf.variable_scope("y_conv"): - if y_op in [0, 1]: - f_size = f_sizes[y_op] - y = self._fixed_conv(y, f_size, out_filters, y_stride, is_training) - elif y_op in [2, 3]: - inp_c = self._get_C(y) - if y_op == 2: - y = tf.layers.average_pooling2d( - y, [3, 3], [y_stride, y_stride], "SAME", - data_format=self.actual_data_format) + strides = [1, 1, 1, 1] + + with tf.variable_scope("sep_conv_{}".format(conv_id)): + w_depthwise = create_weight( + "w_depth", [f_size, f_size, inp_c, 1]) + w_pointwise = create_weight( + "w_point", [1, 1, inp_c, out_filters]) + x = tf.nn.elu(x) + x = tf.nn.separable_conv2d( + x, + depthwise_filter=w_depthwise, + pointwise_filter=w_pointwise, + strides=strides, padding="SAME", data_format=self.data_format) + x = norm(x, is_training=is_training, data_format=self.data_format, norm_type="batch") + + return x + + def _fixed_combine(self, layers, used, out_filters, is_training, + normal_or_reduction_cell="normal"): + """Adjust if necessary. + + Args: + layers: a list of tf tensors of size [NHWC] of [NCHW]. + used: a numpy tensor, [0] means not used. + """ + + out_hw = min([self._get_HW(layer) + for i, layer in enumerate(layers) if used[i] == 0]) + out = [] + + with tf.variable_scope("final_combine"): + for i, layer in enumerate(layers): + if used[i] == 0: + hw = self._get_HW(layer) + if hw > out_hw: + assert hw == out_hw * \ + 2, ("i_hw={0} != {1}=o_hw".format(hw, out_hw)) + with tf.variable_scope("calibrate_{0}".format(i)): + x = self._factorized_reduction( + layer, out_filters, 2, is_training) + else: + x = layer + out.append(x) + + if self.data_format == "NHWC": + out = tf.concat(out, axis=3) + elif self.data_format == "NCHW": + out = tf.concat(out, axis=1) else: - y = tf.layers.max_pooling2d( - y, [3, 3], [y_stride, y_stride], "SAME", - data_format=self.actual_data_format) - if inp_c != out_filters: - w = create_weight("w", [1, 1, inp_c, out_filters]) - y = tf.nn.relu(y) - y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - y = batch_norm(y, is_training, data_format=self.data_format) - else: - inp_c = self._get_C(y) - if y_stride > 1: - assert y_stride == 2 - y = self._factorized_reduction(y, out_filters, 2, is_training) - if inp_c != out_filters: - w = create_weight("w", [1, 1, inp_c, out_filters]) - y = tf.nn.relu(y) - y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - y = batch_norm(y, is_training, data_format=self.data_format) - - if (y_op in [0, 1, 2, 3] and - self.drop_path_keep_prob is not None and - is_training): - y = self._apply_drop_path(y, layer_id) - - out = x + y - layers.append(out) - out = self._fixed_combine(layers, used, out_filters, is_training, - normal_or_reduction_cell) - - return out - - def _enas_cell(self, x, curr_cell, prev_cell, op_id, out_filters): - """Performs an enas operation specified by op_id.""" - - num_possible_inputs = curr_cell + 1 - - with tf.variable_scope("avg_pool"): - avg_pool = tf.layers.average_pooling2d( - x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) - avg_pool_c = self._get_C(avg_pool) - if avg_pool_c != out_filters: - with tf.variable_scope("conv"): - w = create_weight( - "w", [num_possible_inputs, avg_pool_c * out_filters]) - w = w[prev_cell] - w = tf.reshape(w, [1, 1, avg_pool_c, out_filters]) - avg_pool = tf.nn.relu(avg_pool) - avg_pool = tf.nn.conv2d(avg_pool, w, strides=[1, 1, 1, 1], - padding="SAME", data_format=self.data_format) - avg_pool = batch_norm(avg_pool, is_training=True, - data_format=self.data_format) - - with tf.variable_scope("max_pool"): - max_pool = tf.layers.max_pooling2d( - x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) - max_pool_c = self._get_C(max_pool) - if max_pool_c != out_filters: - with tf.variable_scope("conv"): - w = create_weight( - "w", [num_possible_inputs, max_pool_c * out_filters]) - w = w[prev_cell] - w = tf.reshape(w, [1, 1, max_pool_c, out_filters]) - max_pool = tf.nn.relu(max_pool) - max_pool = tf.nn.conv2d(max_pool, w, strides=[1, 1, 1, 1], - padding="SAME", data_format=self.data_format) - max_pool = batch_norm(max_pool, is_training=True, - data_format=self.data_format) - - x_c = self._get_C(x) - if x_c != out_filters: - with tf.variable_scope("x_conv"): - w = create_weight("w", [num_possible_inputs, x_c * out_filters]) - w = w[prev_cell] - w = tf.reshape(w, [1, 1, x_c, out_filters]) - x = tf.nn.relu(x) - x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME", - data_format=self.data_format) - x = batch_norm(x, is_training=True, data_format=self.data_format) - - out = [ - self._enas_conv(x, curr_cell, prev_cell, 3, out_filters), - self._enas_conv(x, curr_cell, prev_cell, 5, out_filters), - avg_pool, - max_pool, - x, - ] - - out = tf.stack(out, axis=0) - out = out[op_id, :, :, :, :] - return out - - def _enas_conv(self, x, curr_cell, prev_cell, filter_size, out_filters, - stack_conv=2): - """Performs an enas convolution specified by the relevant parameters.""" - - with tf.variable_scope("conv_{0}x{0}".format(filter_size)): - num_possible_inputs = curr_cell + 2 - for conv_id in range(stack_conv): - with tf.variable_scope("stack_{0}".format(conv_id)): - # create params and pick the correct path - inp_c = self._get_C(x) - w_depthwise = create_weight( - "w_depth", [num_possible_inputs, filter_size * filter_size * inp_c]) - w_depthwise = w_depthwise[prev_cell, :] - w_depthwise = tf.reshape( - w_depthwise, [filter_size, filter_size, inp_c, 1]) - - w_pointwise = create_weight( - "w_point", [num_possible_inputs, inp_c * out_filters]) - w_pointwise = w_pointwise[prev_cell, :] - w_pointwise = tf.reshape(w_pointwise, [1, 1, inp_c, out_filters]) - - with tf.variable_scope("bn"): - zero_init = tf.initializers.zeros(dtype=tf.float32) - one_init = tf.initializers.ones(dtype=tf.float32) - offset = create_weight( - "offset", [num_possible_inputs, out_filters], - initializer=zero_init) - scale = create_weight( - "scale", [num_possible_inputs, out_filters], - initializer=one_init) - offset = offset[prev_cell] - scale = scale[prev_cell] - - # the computations - x = tf.nn.relu(x) - x = tf.nn.separable_conv2d( + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + + return out + + def _fixed_layer(self, layer_id, prev_layers, arc, out_filters, stride, + is_training, normal_or_reduction_cell="normal"): + """ + Args: + prev_layers: cache of previous layers. for skip connections + is_training: for batch_norm + """ + + assert len(prev_layers) == 2 + layers = [prev_layers[0], prev_layers[1]] + layers = self._maybe_calibrate_size(layers, out_filters, + is_training=is_training) + + with tf.variable_scope("layer_base"): + x = layers[1] + inp_c = self._get_C(x) + w = create_weight("w", [1, 1, inp_c, out_filters]) + x = tf.nn.elu(x) + x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + x = norm(x, is_training=is_training, data_format=self.data_format, norm_type="batch") + layers[1] = x + + used = np.zeros([self.num_cells + 2], dtype=np.int32) + f_sizes = [3, 5] + for cell_id in range(self.num_cells): + with tf.variable_scope("cell_{}".format(cell_id)): + x_id = arc[4 * cell_id] + used[x_id] += 1 + x_op = arc[4 * cell_id + 1] + x = layers[x_id] + x_stride = stride if x_id in [0, 1] else 1 + with tf.variable_scope("x_conv"): + if x_op in [0, 1]: + f_size = f_sizes[x_op] + x = self._fixed_conv( + x, f_size, out_filters, x_stride, is_training) + elif x_op in [2, 3]: + inp_c = self._get_C(x) + if x_op == 2: + x = tf.layers.average_pooling2d( + x, [3, 3], [x_stride, x_stride], "SAME", + data_format=self.actual_data_format) + else: + x = tf.layers.max_pooling2d( + x, [3, 3], [x_stride, x_stride], "SAME", + data_format=self.actual_data_format) + if inp_c != out_filters: + w = create_weight("w", [1, 1, inp_c, out_filters]) + x = tf.nn.elu(x) + x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + x = norm( + x, is_training=is_training, data_format=self.data_format, norm_type="batch") + else: + inp_c = self._get_C(x) + if x_stride > 1: + assert x_stride == 2 + x = self._factorized_reduction( + x, out_filters, 2, is_training) + if inp_c != out_filters: + w = create_weight("w", [1, 1, inp_c, out_filters]) + x = tf.nn.elu(x) + x = tf.nn.conv2d( + x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) + x = norm( + x, is_training=is_training, data_format=self.data_format, norm_type="batch") + if (x_op in [0, 1, 2, 3] and + self.drop_path_keep_prob is not None and + is_training): + x = self._apply_drop_path(x, layer_id) + + y_id = arc[4 * cell_id + 2] + used[y_id] += 1 + y_op = arc[4 * cell_id + 3] + y = layers[y_id] + y_stride = stride if y_id in [0, 1] else 1 + with tf.variable_scope("y_conv"): + if y_op in [0, 1]: + f_size = f_sizes[y_op] + y = self._fixed_conv( + y, f_size, out_filters, y_stride, is_training) + elif y_op in [2, 3]: + inp_c = self._get_C(y) + if y_op == 2: + y = tf.layers.average_pooling2d( + y, [3, 3], [y_stride, y_stride], "SAME", + data_format=self.actual_data_format) + else: + y = tf.layers.max_pooling2d( + y, [3, 3], [y_stride, y_stride], "SAME", + data_format=self.actual_data_format) + if inp_c != out_filters: + w = create_weight("w", [1, 1, inp_c, out_filters]) + y = tf.nn.elu(y) + y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + y = norm( + y, is_training=is_training, data_format=self.data_format, norm_type="batch") + else: + inp_c = self._get_C(y) + if y_stride > 1: + assert y_stride == 2 + y = self._factorized_reduction( + y, out_filters, 2, is_training) + if inp_c != out_filters: + w = create_weight("w", [1, 1, inp_c, out_filters]) + y = tf.nn.elu(y) + y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", + data_format=self.data_format) + y = norm( + y, is_training=is_training, data_format=self.data_format, norm_type="batch") + + if (y_op in [0, 1, 2, 3] and + self.drop_path_keep_prob is not None and + is_training): + y = self._apply_drop_path(y, layer_id) + + out = x + y + layers.append(out) + out = self._fixed_combine(layers, used, out_filters, is_training=is_training, + normal_or_reduction_cell=normal_or_reduction_cell) + + return out + + def _enas_cell(self, x, curr_cell, prev_cell, op_id, out_filters, is_training): + """Performs an enas operation specified by op_id.""" + + num_possible_inputs = curr_cell + 1 + + with tf.variable_scope("avg_pool"): + avg_pool = tf.layers.average_pooling2d( + x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) + avg_pool_c = self._get_C(avg_pool) + if avg_pool_c != out_filters: + with tf.variable_scope("conv"): + w = create_weight( + "w", [num_possible_inputs, avg_pool_c * out_filters]) + w = w[prev_cell] + w = tf.reshape(w, [1, 1, avg_pool_c, out_filters]) + avg_pool = tf.nn.elu(avg_pool) + avg_pool = tf.nn.conv2d(avg_pool, w, strides=[1, 1, 1, 1], + padding="SAME", data_format=self.data_format) + avg_pool = norm(avg_pool, is_training=is_training, + data_format=self.data_format, norm_type="batch") + + with tf.variable_scope("max_pool"): + max_pool = tf.layers.max_pooling2d( + x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) + max_pool_c = self._get_C(max_pool) + if max_pool_c != out_filters: + with tf.variable_scope("conv"): + w = create_weight( + "w", [num_possible_inputs, max_pool_c * out_filters]) + w = w[prev_cell] + w = tf.reshape(w, [1, 1, max_pool_c, out_filters]) + max_pool = tf.nn.elu(max_pool) + max_pool = tf.nn.conv2d(max_pool, w, strides=[1, 1, 1, 1], + padding="SAME", data_format=self.data_format) + max_pool = norm(max_pool, is_training=is_training, + data_format=self.data_format, norm_type="batch") + + x_c = self._get_C(x) + if x_c != out_filters: + with tf.variable_scope("x_conv"): + w = create_weight( + "w", [num_possible_inputs, x_c * out_filters]) + w = w[prev_cell] + w = tf.reshape(w, [1, 1, x_c, out_filters]) + x = tf.nn.elu(x) + x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME", + data_format=self.data_format) + x = norm(x, is_training=is_training, + data_format=self.data_format, norm_type="batch") + + out = [ + self._enas_conv(x, curr_cell, prev_cell, 3, out_filters, is_training=is_training), + self._enas_conv(x, curr_cell, prev_cell, 5, out_filters, is_training=is_training), + avg_pool, + max_pool, x, - depthwise_filter=w_depthwise, - pointwise_filter=w_pointwise, - strides=[1, 1, 1, 1], padding="SAME", - data_format=self.data_format) - x, _, _ = tf.nn.fused_batch_norm( - x, scale, offset, epsilon=1e-5, data_format=self.data_format, - is_training=True) - return x - - def _enas_layer(self, layer_id, prev_layers, arc, out_filters): - """ - Args: - layer_id: current layer - prev_layers: cache of previous layers. for skip connections - start_idx: where to start looking at. technically, we can infer this - from layer_id, but why bother... - """ - - assert len(prev_layers) == 2, "need exactly 2 inputs" - layers = [prev_layers[0], prev_layers[1]] - layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) - used = [] - for cell_id in range(self.num_cells): - prev_layers = tf.stack(layers, axis=0) - with tf.variable_scope("cell_{0}".format(cell_id)): - with tf.variable_scope("x"): - x_id = arc[4 * cell_id] - x_op = arc[4 * cell_id + 1] - x = prev_layers[x_id, :, :, :, :] - x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) - x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) - - with tf.variable_scope("y"): - y_id = arc[4 * cell_id + 2] - y_op = arc[4 * cell_id + 3] - y = prev_layers[y_id, :, :, :, :] - y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) - y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) - - out = x + y - used.extend([x_used, y_used]) - layers.append(out) - - used = tf.add_n(used) - indices = tf.where(tf.equal(used, 0)) - indices = tf.to_int32(indices) - indices = tf.reshape(indices, [-1]) - num_outs = tf.size(indices) - out = tf.stack(layers, axis=0) - out = tf.gather(out, indices, axis=0) - - inp = prev_layers[0] - if self.data_format == "NHWC": - N = tf.shape(inp)[0] - H = tf.shape(inp)[1] - W = tf.shape(inp)[2] - C = tf.shape(inp)[3] - out = tf.transpose(out, [1, 2, 3, 0, 4]) - out = tf.reshape(out, [N, H, W, num_outs * out_filters]) - elif self.data_format == "NCHW": - N = tf.shape(inp)[0] - C = tf.shape(inp)[1] - H = tf.shape(inp)[2] - W = tf.shape(inp)[3] - out = tf.transpose(out, [1, 0, 2, 3, 4]) - out = tf.reshape(out, [N, num_outs * out_filters, H, W]) - else: - raise ValueError("Unknown data_format '{0}'".format(self.data_format)) - - with tf.variable_scope("final_conv"): - w = create_weight("w", [self.num_cells + 2, out_filters * out_filters]) - w = tf.gather(w, indices, axis=0) - w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) - out = tf.nn.relu(out) - out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", - data_format=self.data_format) - out = batch_norm(out, is_training=True, data_format=self.data_format) - - out = tf.reshape(out, tf.shape(prev_layers[0])) - - return out - - # override - def _build_train(self): - print("-" * 80) - print("Build train graph") - logits = self._model(self.x_train, is_training=True) - log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=self.y_train) - self.loss = tf.reduce_mean(log_probs) - - if self.use_aux_heads: - log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=self.aux_logits, labels=self.y_train) - self.aux_loss = tf.reduce_mean(log_probs) - train_loss = self.loss + 0.4 * self.aux_loss - else: - train_loss = self.loss - - self.train_preds = tf.argmax(logits, axis=1) - self.train_preds = tf.to_int32(self.train_preds) - self.train_acc = tf.equal(self.train_preds, self.y_train) - self.train_acc = tf.to_int32(self.train_acc) - self.train_acc = tf.reduce_sum(self.train_acc) - - tf_variables = [ - var for var in tf.trainable_variables() if ( - var.name.startswith(self.name) and "aux_head" not in var.name)] - self.num_vars = count_model_params(tf_variables) - print("Model has {0} params".format(self.num_vars)) - - self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( - train_loss, - tf_variables, - self.global_step, - clip_mode=self.clip_mode, - grad_bound=self.grad_bound, - l2_reg=self.l2_reg, - lr_init=self.lr_init, - lr_dec_start=self.lr_dec_start, - lr_dec_every=self.lr_dec_every, - lr_dec_rate=self.lr_dec_rate, - lr_cosine=self.lr_cosine, - lr_max=self.lr_max, - lr_min=self.lr_min, - lr_T_0=self.lr_T_0, - lr_T_mul=self.lr_T_mul, - num_train_batches=self.num_train_batches, - optim_algo=self.optim_algo, - sync_replicas=self.sync_replicas, - num_aggregate=self.num_aggregate, - num_replicas=self.num_replicas) - - # override - def _build_valid(self): - if self.x_valid is not None: - print("-" * 80) - print("Build valid graph") - logits = self._model(self.x_valid, False, reuse=True) - self.valid_preds = tf.argmax(logits, axis=1) - self.valid_preds = tf.to_int32(self.valid_preds) - self.valid_acc = tf.equal(self.valid_preds, self.y_valid) - self.valid_acc = tf.to_int32(self.valid_acc) - self.valid_acc = tf.reduce_sum(self.valid_acc) - - # override - def _build_test(self): - print("-" * 80) - print("Build test graph") - logits = self._model(self.x_test, False, reuse=True) - self.test_preds = tf.argmax(logits, axis=1) - self.test_preds = tf.to_int32(self.test_preds) - self.test_acc = tf.equal(self.test_preds, self.y_test) - self.test_acc = tf.to_int32(self.test_acc) - self.test_acc = tf.reduce_sum(self.test_acc) - - # override - def build_valid_rl(self, shuffle=False): - print("-" * 80) - print("Build valid graph on shuffled data") - with tf.device("/cpu:0"): - # shuffled valid data: for choosing validation model - if not shuffle and self.data_format == "NCHW": - self.images["valid_original"] = np.transpose( - self.images["valid_original"], [0, 3, 1, 2]) - x_valid_shuffle, y_valid_shuffle = tf.train.shuffle_batch( - [self.images["valid_original"], self.labels["valid_original"]], - batch_size=self.batch_size, - capacity=25000, - enqueue_many=True, - min_after_dequeue=0, - num_threads=16, - seed=self.seed, - allow_smaller_final_batch=True, - ) - - def _pre_process(x): - x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) - x = tf.random_crop(x, [32, 32, 3], seed=self.seed) - x = tf.image.random_flip_left_right(x, seed=self.seed) - if self.data_format == "NCHW": - x = tf.transpose(x, [2, 0, 1]) + ] + + out = tf.stack(out, axis=0) + if self.verbose > 0: + print('-' * 80) + shape_list = out.get_shape().as_list() + print('_enas_cell::cell op_id: ' + str(op_id) + ' out shape: ' + str(shape_list) + ' data_format: ' + str(self.data_format)) + for line in traceback.format_stack(): + print(line.strip()) + out = out[op_id, :, :, :, :] + return out + + def _enas_conv(self, x, curr_cell, prev_cell, filter_size, out_filters, is_training, + stack_conv=2, norm_type='group'): + """Performs an enas convolution specified by the relevant parameters.""" + + with tf.variable_scope("conv_{0}x{0}".format(filter_size)): + num_possible_inputs = curr_cell + 2 + for conv_id in range(stack_conv): + with tf.variable_scope("stack_{0}".format(conv_id)): + # create params and pick the correct path + inp_c = self._get_C(x) + w_depthwise = create_weight( + "w_depth", [num_possible_inputs, filter_size * filter_size * inp_c]) + w_depthwise = w_depthwise[prev_cell, :] + w_depthwise = tf.reshape( + w_depthwise, [filter_size, filter_size, inp_c, 1]) + + w_pointwise = create_weight( + "w_point", [num_possible_inputs, inp_c * out_filters]) + w_pointwise = w_pointwise[prev_cell, :] + w_pointwise = tf.reshape( + w_pointwise, [1, 1, inp_c, out_filters]) + + # the computations + x = tf.nn.elu(x) + x = tf.nn.separable_conv2d( + x, + depthwise_filter=w_depthwise, + pointwise_filter=w_pointwise, + strides=[1, 1, 1, 1], padding="SAME", + data_format=self.data_format) + x = norm(x, is_training=is_training, norm_type="batch") return x - if shuffle: - x_valid_shuffle = tf.map_fn( - _pre_process, x_valid_shuffle, back_prop=False) - - logits = self._model(x_valid_shuffle, is_training=True, reuse=True) - valid_shuffle_preds = tf.argmax(logits, axis=1) - valid_shuffle_preds = tf.to_int32(valid_shuffle_preds) - self.valid_shuffle_acc = tf.equal(valid_shuffle_preds, y_valid_shuffle) - self.valid_shuffle_acc = tf.to_int32(self.valid_shuffle_acc) - self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) - - def connect_controller(self, controller_model): - if self.fixed_arc is None: - self.normal_arc, self.reduce_arc = controller_model.sample_arc - else: - fixed_arc = np.array([int(x) for x in self.fixed_arc.split(" ") if x]) - self.normal_arc = fixed_arc[:4 * self.num_cells] - self.reduce_arc = fixed_arc[4 * self.num_cells:] - - self._build_train() - self._build_valid() - self._build_test() + def _enas_layer(self, layer_id, prev_layers, arc, out_filters, is_training): + """ + Args: + layer_id: current layer + prev_layers: cache of previous layers. for skip connections + start_idx: where to start looking at. technically, we can infer this + from layer_id, but why bother... + """ + + assert len(prev_layers) == 2, "need exactly 2 inputs" + layers = [prev_layers[0], prev_layers[1]] + layers = self._maybe_calibrate_size( + layers, out_filters, is_training=is_training) + used = [] + for cell_id in range(self.num_cells): + prev_layers = tf.stack(layers, axis=0) + with tf.variable_scope("cell_{0}".format(cell_id)): + with tf.variable_scope("x"): + x_id = arc[4 * cell_id] + x_op = arc[4 * cell_id + 1] + x = prev_layers[x_id, :, :, :, :] + x = self._enas_cell(x, cell_id, x_id, x_op, out_filters, is_training=is_training) + x_used = tf.one_hot( + x_id, depth=self.num_cells + 2, dtype=tf.int32) + + with tf.variable_scope("y"): + y_id = arc[4 * cell_id + 2] + y_op = arc[4 * cell_id + 3] + y = prev_layers[y_id, :, :, :, :] + y = self._enas_cell(y, cell_id, y_id, y_op, out_filters, is_training=is_training) + y_used = tf.one_hot( + y_id, depth=self.num_cells + 2, dtype=tf.int32) + + out = x + y + used.extend([x_used, y_used]) + layers.append(out) + if self.verbose > 0: + print('-' * 80) + shape_list = out.get_shape().as_list() + print('_enas_layer::cell cell_id: ' + str(cell_id) + ' out shape: ' + str(shape_list) + ' data_format: ' + str(self.data_format)) + for line in traceback.format_stack(): + print(line.strip()) + + used = tf.add_n(used) + indices = tf.where(tf.equal(used, 0)) + indices = tf.to_int32(indices) + indices = tf.reshape(indices, [-1]) + num_outs = tf.size(indices) + out = tf.stack(layers, axis=0) + out = tf.gather(out, indices, axis=0) + + inp = prev_layers[0] + # get shape as an integer list, + # this is necessary to prevent some shape information being lost + # in the transpose/reshape below + inp_shape_list = inp.get_shape().as_list() + if self.verbose > 0: + print('-' * 80) + print('_enas_layer::inp tensor: ' + str(inp) + ' shape: ' + str(inp_shape_list) + ' data_format: ' + str(self.data_format)) + out_shape_list = out.get_shape().as_list() + print('_enas_layer::out tensor: ' + str(out) + ' shape: ' + str(out_shape_list) + ' data_format: ' + str(self.data_format)) + print('_enas_layer::num_outs: ' + str(num_outs) + ' _enas_layer::out_filters: ' + str(out_filters)) + for line in traceback.format_stack(): + print(line.strip()) + if self.data_format == "NHWC": + N = tf.shape(inp)[0] + H = inp_shape_list[1] + W = inp_shape_list[2] + C = inp_shape_list[3] + out = tf.transpose(out, [1, 2, 3, 0, 4]) + out = tf.reshape(out, [N, H, W, num_outs * out_filters]) + elif self.data_format == "NCHW": + N = tf.shape(inp)[0] + C = inp_shape_list[1] + H = inp_shape_list[2] + W = inp_shape_list[3] + out = tf.transpose(out, [1, 0, 2, 3, 4]) + out = tf.reshape(out, [N, num_outs * out_filters, H, W]) + else: + raise ValueError( + "Unknown data_format '{0}'".format(self.data_format)) + + with tf.variable_scope("final_conv"): + if self.verbose > 0: + print('-' * 80) + shape_list = out.get_shape().as_list() + print('_enas_layer::final_conv out shape: ' + str(shape_list) + ' data_format: ' + str(self.data_format)) + for line in traceback.format_stack(): + print(line.strip()) + w = create_weight( + "w", [self.num_cells + 2, out_filters * out_filters]) + w = tf.gather(w, indices, axis=0) + w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) + out = tf.nn.elu(out) + out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", + data_format=self.data_format) + out = norm(out, is_training=is_training, + data_format=self.data_format, norm_type="batch") + + out = tf.reshape(out, tf.shape(prev_layers[0])) + + return out + + # override + def eval_once(self, sess, eval_set, feed_dict=None, verbose=False): + """Expects self.acc and self.global_step to be defined. + + Args: + sess: tf.Session() or one of its wrap arounds. + feed_dict: can be used to give more information to sess.run(). + eval_set: "valid" or "test" + """ + + assert self.global_step is not None + global_step = sess.run(self.global_step) + print("Eval {} set at {}".format(eval_set, global_step)) + + if eval_set == "valid": + assert self.x_valid is not None + assert self.valid_acc is not None + num_examples = self.num_valid_examples + num_batches = self.num_valid_batches + acc_op = self.valid_acc + acc_op_5mm_7_5deg = self.valid_acc_5mm_7_5deg + acc_op_1cm_15deg = self.valid_acc_1cm_15deg + acc_op_2_30 = self.valid_acc_2cm_30deg + acc_op_4_60 = self.valid_acc_4cm_60deg + acc_op_8_120 = self.valid_acc_8cm_120deg + acc_op_16cm_240deg = self.valid_acc_16cm_240deg + acc_op_32cm_360deg = self.valid_acc_32cm_360deg + loss_secondary_op = self.valid_loss_secondary + cart_op = self.valid_cart_error + ang_er_op = self.valid_angle_error + loss_op = self.valid_loss + mae_op = self.valid_mae + csvfile = self.output_dir + "/valid_metrics.csv" + elif eval_set == "test": + assert self.test_acc is not None + num_examples = self.num_test_examples + num_batches = self.num_test_batches + acc_op = self.test_acc + acc_op_5mm_7_5deg = self.test_acc_5mm_7_5deg + acc_op_1cm_15deg = self.test_acc_1cm_15deg + acc_op_2_30 = self.test_acc_2cm_30deg + acc_op_4_60 = self.test_acc_4cm_60deg + acc_op_8_120 = self.test_acc_8cm_120deg + acc_op_16cm_240deg = self.test_acc_16cm_240deg + acc_op_32cm_360deg = self.test_acc_32cm_360deg + loss_secondary_op = self.test_loss_secondary + ang_er_op = self.test_angle_error + cart_op = self.test_cart_error + loss_op = self.test_loss + mae_op = self.test_mae + csvfile = self.output_dir + "/test_metrics.csv" + else: + raise NotImplementedError("Unknown eval_set '{}'".format(eval_set)) + + total_acc = 0 + total_acc_5mm_7_5deg = 0 + total_acc_1cm_15deg = 0 + total_acc_2_30 = 0 + total_acc_4_60 = 0 + total_acc_8_120 = 0 + total_acc_16cm_240deg = 0 + total_acc_32cm_360deg = 0 + total_cart_error = 0 + total_mae = 0 + total_loss = 0 + total_exp = 0 + total_angle_error = 0 + total_loss_sec = 0 + normal_arc = [] + reduce_arc = [] + for batch_id in range(num_batches): + # if batch_id == 0: + # if feed_dict is None: + # feed_dict = {} + # # print the arc if we're on batch 0 + # feed_dict['print_arc'] = self.print_arc + # elif batch_id == 1 and feed_dict is not None and 'print_arc' in feed_dict: + # # remove the print arc tensor if we're on batch 1 + # feed_dict.pop('print_arc', None) + if self.fixed_arc is None: + acc, acc_5_7_5, acc_1_15, acc_2_30, acc_4_60, acc_8_120, acc_16_240, acc_32_360, cart_error, angle_error, mse, mae, loss_sec = sess.run( + [acc_op, acc_op_5mm_7_5deg, acc_op_1cm_15deg, acc_op_2_30, acc_op_4_60, acc_op_8_120, acc_op_16cm_240deg, acc_op_32cm_360deg, cart_op, ang_er_op, loss_op, mae_op, loss_secondary_op], feed_dict=feed_dict) + else: + acc, acc_5_7_5, acc_1_15, acc_2_30, acc_4_60, acc_8_120, acc_16_240, acc_32_360, cart_error, angle_error, mse, mae, loss_sec = sess.run( + [acc_op, acc_op_5mm_7_5deg, acc_op_1cm_15deg, acc_op_2_30, acc_op_4_60, acc_op_8_120, acc_op_16cm_240deg, acc_op_32cm_360deg, cart_op, ang_er_op, loss_op, mae_op, loss_secondary_op], feed_dict=feed_dict) + total_acc += acc + total_acc_5mm_7_5deg += acc_5_7_5 + total_acc_1cm_15deg += acc_1_15 + total_acc_2_30 += acc_2_30 + total_acc_4_60 += acc_4_60 + total_acc_8_120 += acc_8_120 + total_acc_16cm_240deg += acc_16_240 + total_acc_32cm_360deg += acc_32_360 + total_cart_error += cart_error + total_angle_error += angle_error + total_loss += mse + total_mae += mae + total_loss_sec += loss_sec + total_exp += self.eval_batch_size + if verbose: + sys.stdout.write( + "\r{:<5d}/{:>5d}".format(total_acc, total_exp)) + if verbose: + print("") + print("{}_accuracy: {:<6.4f}".format( + eval_set, float(total_acc) / total_exp)) + print("{}_accuracy_5mm_7_5deg: {:<6.4f}".format( + eval_set, float(total_acc_5mm_7_5deg) / total_exp)) + print("{}_accuracy_1cm_15deg: {:<6.4f}".format( + eval_set, float(total_acc_1cm_15deg) / total_exp)) + print("{}_accuracy_2cm_30deg: {:<6.4f}".format( + eval_set, float(total_acc_2_30) / total_exp)) + print("{}_accuracy_4cm_60deg: {:<6.4f}".format( + eval_set, float(total_acc_4_60) / total_exp)) + print("{}_accuracy_8cm_120deg: {:<6.4f}".format( + eval_set, float(total_acc_8_120) / total_exp)) + print("{}_accuracy_16cm_240deg: {:<6.4f}".format( + eval_set, float(total_acc_16cm_240deg) / total_exp)) + print("{}_accuracy_32cm_360deg: {:<6.4f}".format( + eval_set, float(total_acc_32cm_360deg) / total_exp)) + if self.rotation_only is False and self.stacking_reward is False: + print("{}_cart_error: {:<6.4f}".format( + eval_set, float(total_cart_error) / num_batches)) + if self.translation_only is False and self.stacking_reward is False: + print("{}_angle_error: {:<6.4f}".format( + eval_set, float(total_angle_error) / num_batches)) + print("{}_loss_1: {:<6.4f}".format( + eval_set, float(total_loss) / num_batches)) + print("{}_loss_2: {:<6.4f}".format( + eval_set, float(total_loss_sec) / num_batches)) + print("{}_mae: {:<6.4f}".format( + eval_set, float(total_mae) / num_batches)) + if self.fixed_arc is None: + print(eval_set, end=" ") + print('Eval Architecture:') + # print(np.reshape(normal_arc, [-1])) + # print(np.reshape(reduce_arc, [-1])) + # self.global_step = tf.Print(self.global_step, [self.normal_arc, self.reduce_arc], 'connect_controller(): [normal_arc, reduce_arc]: ', summarize=20) + if os.path.exists(csvfile): + file_mode = 'a' + else: + file_mode = 'w+' + with open(csvfile, file_mode) as fp: + fp.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format( + total_acc, total_acc_5mm_7_5deg, total_acc_1cm_15deg, total_acc_2_30, total_acc_4_60, total_acc_8_120, total_acc_16cm_240deg, total_acc_32cm_360deg, total_loss, total_mae, total_angle_error, total_cart_error, total_loss_sec)) + + # override + def _build_train(self): + print("-" * 80) + print("Build train graph") + # print("xtrshape-----------------------",self.x_train.shape) + logits = self._model(self.x_train, is_training=True) + # tf.Print(logits,[tf.shape(logits),"-----------log"]) + # print("ytrshape-----------", self.y_train) + if self.dataset == "stacking": + log_probs = tf.nn.sigmoid(logits) + if self.use_msle is False: + self.loss = tf.losses.mean_squared_error( + labels=self.y_train, predictions=log_probs) + self.loss_secondary = tf.reduce_mean(keras.losses.msle( + self.y_train, log_probs)) + else: + self.loss = tf.reduce_mean(keras.losses.msle( + self.y_train, log_probs)) + self.loss_secondary = tf.losses.mean_squared_error( + labels=self.y_train, predictions=log_probs) + else: + activation_fn = tf.nn.sparse_softmax_cross_entropy_with_logits + log_probs = activation_fn( + logits=logits, labels=self.y_train) + self.loss = tf.reduce_mean(log_probs) + + if self.use_aux_heads: + if self.dataset == "stacking": + # Check + log_probs = tf.losses.mean_squared_error( + labels=self.y_train, predictions=log_probs) + else: + log_probs = activation_fn( + logits=self.aux_logits, labels=self.y_train) + self.aux_loss = tf.reduce_mean(log_probs) + train_loss = self.loss + 0.4 * self.aux_loss + else: + train_loss = self.loss + + if self.dataset == "stacking": + cast_type = tf.to_float + else: + cast_type = tf.to_int32 + + if self.dataset == "stacking": + self.train_preds = tf.nn.sigmoid(logits) + self.train_acc = grasp_metrics.grasp_acc( + self.y_train, self.train_preds) + print("train_acc--------------", self.train_acc) + self.train_acc = self.train_acc + self.train_acc = tf.reduce_mean(self.train_acc) + + self.train_acc_5mm_7_5deg = grasp_metrics.grasp_acc_5mm_7_5deg( + self.y_train, self.train_preds) + self.train_acc_5mm_7_5deg = tf.reduce_mean(self.train_acc_5mm_7_5deg) + + self.train_acc_1cm_15deg = grasp_metrics.grasp_acc_1cm_15deg( + self.y_train, self.train_preds) + self.train_acc_1cm_15deg = tf.reduce_mean(self.train_acc_1cm_15deg) + + self.train_acc_2cm_30deg = grasp_metrics.grasp_acc_2cm_30deg( + self.y_train, self.train_preds) + self.train_acc_2cm_30deg = tf.reduce_mean(self.train_acc_2cm_30deg) + + self.train_acc_4cm_60deg = grasp_metrics.grasp_acc_4cm_60deg( + self.y_train, self.train_preds) + self.train_acc_4cm_60deg = tf.reduce_mean(self.train_acc_4cm_60deg) + + self.train_acc_8cm_120deg = grasp_metrics.grasp_acc_8cm_120deg( + self.y_train, self.train_preds) + self.train_acc_8cm_120deg = tf.reduce_mean(self.train_acc_8cm_120deg) + + self.train_acc_16cm_240deg = grasp_metrics.grasp_acc_16cm_240deg( + self.y_train, self.train_preds) + self.train_acc_16cm_240deg = tf.reduce_mean(self.train_acc_16cm_240deg) + + self.train_acc_32cm_360deg = grasp_metrics.grasp_acc_32cm_360deg( + self.y_train, self.train_preds) + self.train_acc_32cm_360deg = tf.reduce_mean(self.train_acc_32cm_360deg) + + self.train_cart_error = grasp_metrics.cart_error( + self.y_train, self.train_preds) + if self.rotation_only is True or self.stacking_reward is True: + self.train_cart_error = tf.zeros([1]) + else: + self.train_cart_error = tf.reduce_mean(self.train_cart_error) + if self.translation_only is True or self.stacking_reward is True: + self.train_angle_error = tf.zeros([1]) + else: + self.train_angle_error = grasp_metrics.angle_error( + self.y_train, self.train_preds) + self.train_angle_error = tf.reduce_mean(self.train_angle_error) + self.train_mae = tf.metrics.mean_absolute_error( + self.y_train, self.train_preds) + self.train_mae = tf.reduce_mean(self.train_mae) + + else: + self.train_preds = tf.argmax(logits, axis=1) + self.train_preds = cast_type(self.train_preds) + # tf.Print(self.train_preds,[tf.shape(self.train_preds),"trainpreds----"]) + # tf.Print(self.y_train,[tf.shape(self.y_train),"ytra==-------------"]) + self.train_acc = tf.equal(self.train_preds, self.y_train) + self.train_acc = cast_type(self.train_acc) + self.train_acc = tf.reduce_mean(self.train_acc) + self.train_cart_error = tf.zeros([1]) + self.train_angle_error = tf.zeros([1]) + self.train_mae = tf.zeros([1]) + + tf_variables = [ + var for var in tf.trainable_variables() if ( + var.name.startswith(self.name) and "aux_head" not in var.name)] + self.num_vars = count_model_params(tf_variables) + print("Model has {0} params".format(self.num_vars)) + + self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( + train_loss, + tf_variables, + self.global_step, + clip_mode=self.clip_mode, + grad_bound=self.grad_bound, + l2_reg=self.l2_reg, + lr_init=self.lr_init, + lr_dec_start=self.lr_dec_start, + lr_dec_every=self.lr_dec_every, + lr_dec_rate=self.lr_dec_rate, + lr_cosine=self.lr_cosine, + lr_max=self.lr_max, + lr_min=self.lr_min, + lr_T_0=self.lr_T_0, + lr_T_mul=self.lr_T_mul, + num_train_batches=self.num_train_batches, + optim_algo=self.optim_algo, + sync_replicas=self.sync_replicas, + num_aggregate=self.num_aggregate, + num_replicas=self.num_replicas) + + # override + def _build_valid(self): + if self.x_valid is not None: + print("-" * 80) + print("Build valid graph") + logits = self._model( + self.x_valid, is_training=True, reuse=True) + if self.dataset == "stacking": + logits = tf.nn.sigmoid(logits) + cast_type = tf.to_float + self.valid_preds = logits + self.valid_acc = grasp_metrics.grasp_acc( + self.y_valid, self.valid_preds) + self.valid_acc = tf.reduce_sum(self.valid_acc) + + self.valid_acc_5mm_7_5deg = grasp_metrics.grasp_acc_5mm_7_5deg( + self.y_valid, self.valid_preds) + self.valid_acc_5mm_7_5deg = tf.reduce_sum(self.valid_acc_5mm_7_5deg) + + self.valid_acc_1cm_15deg = grasp_metrics.grasp_acc_1cm_15deg( + self.y_valid, self.valid_preds) + self.valid_acc_1cm_15deg = tf.reduce_sum(self.valid_acc_1cm_15deg) + + self.valid_acc_2cm_30deg = grasp_metrics.grasp_acc_2cm_30deg( + self.y_valid, self.valid_preds) + self.valid_acc_2cm_30deg = tf.reduce_sum(self.valid_acc_2cm_30deg) + + self.valid_acc_4cm_60deg = grasp_metrics.grasp_acc_4cm_60deg( + self.y_valid, self.valid_preds) + self.valid_acc_4cm_60deg = tf.reduce_sum(self.valid_acc_4cm_60deg) + + self.valid_acc_8cm_120deg = grasp_metrics.grasp_acc_8cm_120deg( + self.y_valid, self.valid_preds) + self.valid_acc_8cm_120deg = tf.reduce_sum(self.valid_acc_8cm_120deg) + + self.valid_acc_16cm_240deg = grasp_metrics.grasp_acc_16cm_240deg( + self.y_valid, self.valid_preds) + self.valid_acc_16cm_240deg = tf.reduce_sum(self.valid_acc_16cm_240deg) + + self.valid_acc_32cm_360deg = grasp_metrics.grasp_acc_32cm_360deg( + self.y_valid, self.valid_preds) + self.valid_acc_32cm_360deg = tf.reduce_sum(self.valid_acc_32cm_360deg) + + if self.use_msle is False: + self.valid_loss = tf.losses.mean_squared_error( + labels=self.y_valid, predictions=self.valid_preds) + self.valid_loss_secondary = tf.reduce_mean(keras.losses.msle( + self.y_valid, self.valid_preds)) + else: + self.valid_loss = tf.reduce_mean(keras.losses.msle( + self.y_valid, self.valid_preds)) + self.valid_loss_secondary = tf.losses.mean_squared_error( + labels=self.y_valid, predictions=self.valid_preds) + + self.valid_cart_error = grasp_metrics.cart_error( + self.y_valid, self.valid_preds) + if self.rotation_only is True or self.stacking_reward is True: + self.valid_cart_error = tf.zeros([1]) + else: + self.valid_cart_error = tf.reduce_mean(self.valid_cart_error) + if self.translation_only is True or self.stacking_reward is True: + self.valid_angle_error = tf.zeros([1]) + else: + self.valid_angle_error = grasp_metrics.angle_error( + self.y_valid, self.valid_preds) + self.valid_angle_error = tf.reduce_mean(self.valid_angle_error) + self.valid_mae = tf.metrics.mean_absolute_error( + self.y_valid, self.valid_preds) + self.valid_mae = tf.reduce_mean(self.valid_mae) + + else: + cast_type = tf.to_int32 + self.valid_preds = tf.argmax(logits, axis=1) + self.valid_preds = cast_type(self.valid_preds) + self.valid_acc = tf.equal(self.valid_preds, self.y_valid) + self.valid_acc = cast_type(self.valid_acc) + self.valid_acc = tf.reduce_sum(self.valid_acc) + + # override + def _build_test(self): + print("-" * 80) + print("Build test graph") + logits = self._model(self.x_test, is_training=False, reuse=True) + if self.dataset == "stacking": + logits = tf.nn.sigmoid(logits) + cast_type = tf.to_float + self.test_preds = logits + self.test_acc = grasp_metrics.grasp_acc( + self.y_test, self.test_preds) + self.test_acc = tf.reduce_sum(self.test_acc) + + self.test_acc_5mm_7_5deg = grasp_metrics.grasp_acc_5mm_7_5deg( + self.y_test, self.test_preds) + self.test_acc_5mm_7_5deg = tf.reduce_sum(self.test_acc_5mm_7_5deg) + + self.test_acc_1cm_15deg = grasp_metrics.grasp_acc_1cm_15deg( + self.y_test, self.test_preds) + self.test_acc_1cm_15deg = tf.reduce_sum(self.test_acc_1cm_15deg) + + self.test_acc_2cm_30deg = grasp_metrics.grasp_acc_2cm_30deg( + self.y_test, self.test_preds) + self.test_acc_2cm_30deg = tf.reduce_sum(self.test_acc_2cm_30deg) + + self.test_acc_4cm_60deg = grasp_metrics.grasp_acc_4cm_60deg( + self.y_test, self.test_preds) + self.test_acc_4cm_60deg = tf.reduce_sum(self.test_acc_4cm_60deg) + + self.test_acc_8cm_120deg = grasp_metrics.grasp_acc_8cm_120deg( + self.y_test, self.test_preds) + self.test_acc_8cm_120deg = tf.reduce_sum(self.test_acc_8cm_120deg) + + self.test_acc_16cm_240deg = grasp_metrics.grasp_acc_16cm_240deg( + self.y_test, self.test_preds) + self.test_acc_16cm_240deg = tf.reduce_sum(self.test_acc_16cm_240deg) + + self.test_acc_32cm_360deg = grasp_metrics.grasp_acc_32cm_360deg( + self.y_test, self.test_preds) + self.test_acc_32cm_360deg = tf.reduce_sum(self.test_acc_32cm_360deg) + + self.test_cart_error = grasp_metrics.cart_error( + self.y_test, self.test_preds) + if self.rotation_only is True or self.stacking_reward is True: + self.test_cart_error = tf.zeros([1]) + else: + self.test_cart_error = tf.reduce_mean(self.test_cart_error) + if self.translation_only is True or self.stacking_reward is True: + self.test_angle_error = tf.zeros([1]) + else: + self.test_angle_error = grasp_metrics.angle_error( + self.y_test, self.test_preds) + self.test_angle_error = tf.reduce_mean(self.test_angle_error) + self.test_mae = tf.metrics.mean_absolute_error( + self.y_test, self.test_preds) + self.test_mae = tf.reduce_mean(self.test_mae) + if self.use_msle is False: + self.test_loss = tf.losses.mean_squared_error( + labels=self.y_test, predictions=self.test_preds) + self.test_loss_secondary = tf.reduce_mean(keras.losses.msle( + self.y_test, self.test_preds)) + else: + self.test_loss = tf.reduce_mean(keras.losses.msle( + self.y_test, self.test_preds)) + self.test_loss_secondary = tf.losses.mean_squared_error( + labels=self.y_test, predictions=self.test_preds) + + else: + cast_type = tf.to_int32 + self.test_preds = tf.argmax(logits, axis=1) + self.test_preds = cast_type(self.test_preds) + self.test_acc = tf.equal(self.test_preds, self.y_test) + self.test_acc = cast_type(self.test_acc) + self.test_acc = tf.reduce_sum(self.test_acc) + + # override + def build_valid_rl(self, shuffle=False): + print("-" * 80) + print("Build valid graph on shuffled data") + if self.dataset == "stacking": + with tf.device("/cpu:0"): + if not shuffle: + self.x_valid_shuffle, self.y_valid_shuffle = self.x_valid, self.y_valid + else: + raise NotImplementedError( + 'This portion of the code is not correctly implemented, ' + 'so it must be fixed before running it. ' + 'see models.py::__init__() for reference code using the ' + 'CostarBlockStackingSequence().') + data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_15'] + label_features = ['grasp_goal_xyz_aaxyz_nsc_8'] + validation_shuffle_generator = CostarBlockStackingSequence( + self.validation_data, batch_size=self.batch_size, verbose=0, + label_features_to_extract=label_features, + data_features_to_extract=data_features, output_shape=self.image_shape, shuffle=True) + validation_enqueuer = OrderedEnqueuer( + validation_generator, + use_multiprocessing=False, + shuffle=True) + validation_enqueuer.start(workers=10, max_queue_size=100) + + def validation_generator(): return iter(train_enqueuer.get()) + validation_dataset = Dataset.from_generator(validation_generator, (tf.float32, tf.float32), (tf.TensorShape([None, self.image_shape[0], self.image_shape[1], self.data_features_len]), tf.TensorShape([None, None]))) + x_valid_shuffle, y_valid_shuffle = validation_dataset.make_one_shot_iterator().get_next() + + else: + with tf.device("/cpu:0"): + # shuffled valid data: for choosing validation model + if not shuffle and self.data_format == "NCHW": + self.images["valid_original"] = np.transpose( + self.images["valid_original"], [0, 3, 1, 2]) + self.x_valid_shuffle, self.y_valid_shuffle = tf.train.shuffle_batch( + [self.images["valid_original"], self.labels["valid_original"]], + batch_size=self.batch_size, + capacity=25000, + enqueue_many=True, + min_after_dequeue=0, + num_threads=16, + seed=self.seed, + allow_smaller_final_batch=True, + ) + + def _pre_process(x): + x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) + x = tf.random_crop(x, [32, 32, 3], seed=self.seed) + x = tf.image.random_flip_left_right(x, seed=self.seed) + if self.data_format == "NCHW": + x = tf.transpose(x, [2, 0, 1]) + return x + + if shuffle: + x_valid_shuffle = tf.map_fn( + _pre_process, x_valid_shuffle, back_prop=False) + + # TODO(ahundt) should is_training really be true here? this looks like a validation step... but it is in the controller so maybe some training does happen... + logits = self._model( + self.x_valid_shuffle, is_training=True, reuse=True) + if self.dataset == "stacking": + logits = tf.nn.sigmoid(logits) + cast_type = tf.to_float + self.valid_shuffle_preds = logits + self.valid_shuffle_acc = grasp_metrics.grasp_acc( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) + + self.valid_shuffle_acc_5mm_7_5deg = grasp_metrics.grasp_acc_5mm_7_5deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_5mm_7_5deg = tf.reduce_sum(self.valid_shuffle_acc_5mm_7_5deg) + + self.valid_shuffle_acc_1cm_15deg = grasp_metrics.grasp_acc_1cm_15deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_1cm_15deg = tf.reduce_sum(self.valid_shuffle_acc_1cm_15deg) + + self.valid_shuffle_acc_2cm_30deg = grasp_metrics.grasp_acc_2cm_30deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_2cm_30deg = tf.reduce_sum(self.valid_shuffle_acc_2cm_30deg) + + self.valid_shuffle_acc_4cm_60deg = grasp_metrics.grasp_acc_4cm_60deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_4cm_60deg = tf.reduce_sum(self.valid_shuffle_acc_4cm_60deg) + + self.valid_shuffle_acc_8cm_120deg = grasp_metrics.grasp_acc_8cm_120deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_8cm_120deg = tf.reduce_sum(self.valid_shuffle_acc_8cm_120deg) + + self.valid_shuffle_acc_16cm_240deg = grasp_metrics.grasp_acc_16cm_240deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_16cm_240deg = tf.reduce_sum(self.valid_shuffle_acc_16cm_240deg) + + self.valid_shuffle_acc_32cm_360deg = grasp_metrics.grasp_acc_32cm_360deg( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_acc_32cm_360deg = tf.reduce_sum(self.valid_shuffle_acc_32cm_360deg) + + if self.use_msle is False: + self.valid_shuffle_loss = tf.reduce_mean(tf.losses.mean_squared_error( + labels=self.y_valid_shuffle, predictions=self.valid_shuffle_preds)) + self.valid_shuffle_loss_secondary = tf.reduce_mean(keras.losses.msle( + self.y_valid_shuffle, self.valid_shuffle_preds)) + else: + self.valid_shuffle_loss = tf.reduce_mean(keras.losses.msle( + self.y_valid_shuffle, self.valid_shuffle_preds)) + self.valid_shuffle_loss_secondary = tf.losses.mean_squared_error( + labels=self.y_valid_shuffle, predictions=self.valid_shuffle_preds) + + self.valid_shuffle_cart_error = grasp_metrics.cart_error( + self.y_valid_shuffle, self.valid_shuffle_preds) + if self.rotation_only is True or self.stacking_reward is True: + self.valid_shuffle_cart_error = tf.zeros([1]) + else: + self.valid_shuffle_cart_error = tf.reduce_mean(self.valid_shuffle_cart_error) + if self.translation_only is True or self.stacking_reward is True: + self.valid_shuffle_angle_error = tf.zeros([1]) + else: + self.valid_shuffle_angle_error = grasp_metrics.angle_error( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_angle_error = tf.reduce_mean(self.valid_shuffle_angle_error) + self.valid_shuffle_mae = tf.metrics.mean_absolute_error( + self.y_valid_shuffle, self.valid_shuffle_preds) + self.valid_shuffle_mae = tf.reduce_mean(self.valid_shuffle_mae) + + else: + cast_type = tf.to_int32 + self.valid_shuffle_preds = tf.argmax(logits, axis=1) + self.valid_shuffle_preds = cast_type(self.valid_shuffle_preds) + self.valid_shuffle_acc = tf.equal(self.valid_shuffle_preds, self.y_valid_shuffle) + self.valid_shuffle_acc = cast_type(self.valid_shuffle_acc) + self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) + + def connect_controller(self, controller_model, verbose=0): + if self.fixed_arc is None: + sample_arc = controller_model.sample_arc + normal_arc, reduce_arc = sample_arc + # self.print_arc = tf.Print([0], [normal_arc, reduce_arc], 'connect_controller(): [normal_arc, reduce_arc]: ', summarize=20) + + if verbose: + normal_arc = tf.Print(normal_arc, [normal_arc, reduce_arc], 'connect_controller(): [normal_arc, reduce_arc]: ', summarize=20) + self.normal_arc = normal_arc + self.reduce_arc = reduce_arc + else: + fixed_arc = np.array([int(x) + for x in self.fixed_arc.split(" ") if x]) + self.normal_arc = fixed_arc[:4 * self.num_cells] + self.reduce_arc = fixed_arc[4 * self.num_cells:] + + self._build_train() + self._build_valid() + self._build_test() diff --git a/enas/cifar10/micro_controller.py b/enas/cifar10/micro_controller.py index 1ed39b0..c50181e 100644 --- a/enas/cifar10/micro_controller.py +++ b/enas/cifar10/micro_controller.py @@ -15,258 +15,301 @@ from tensorflow.python.training import moving_averages + class MicroController(Controller): - def __init__(self, - search_for="both", - search_whole_channels=False, - num_branches=6, - num_cells=6, - lstm_size=32, - lstm_num_layers=2, - lstm_keep_prob=1.0, - tanh_constant=None, - op_tanh_reduce=1.0, - temperature=None, - lr_init=1e-3, - lr_dec_start=0, - lr_dec_every=100, - lr_dec_rate=0.9, - l2_reg=0, - entropy_weight=None, - clip_mode=None, - grad_bound=None, - use_critic=False, - bl_dec=0.999, - optim_algo="adam", - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - name="controller", - **kwargs): - - print("-" * 80) - print("Building ConvController") - - self.search_for = search_for - self.search_whole_channels = search_whole_channels - self.num_cells = num_cells - self.num_branches = num_branches - - self.lstm_size = lstm_size - self.lstm_num_layers = lstm_num_layers - self.lstm_keep_prob = lstm_keep_prob - self.tanh_constant = tanh_constant - self.op_tanh_reduce = op_tanh_reduce - self.temperature = temperature - self.lr_init = lr_init - self.lr_dec_start = lr_dec_start - self.lr_dec_every = lr_dec_every - self.lr_dec_rate = lr_dec_rate - self.l2_reg = l2_reg - self.entropy_weight = entropy_weight - self.clip_mode = clip_mode - self.grad_bound = grad_bound - self.use_critic = use_critic - self.bl_dec = bl_dec - - self.optim_algo = optim_algo - self.sync_replicas = sync_replicas - self.num_aggregate = num_aggregate - self.num_replicas = num_replicas - self.name = name - - self._create_params() - arc_seq_1, entropy_1, log_prob_1, c, h = self._build_sampler(use_bias=True) - arc_seq_2, entropy_2, log_prob_2, _, _ = self._build_sampler(prev_c=c, prev_h=h) - self.sample_arc = (arc_seq_1, arc_seq_2) - self.sample_entropy = entropy_1 + entropy_2 - self.sample_log_prob = log_prob_1 + log_prob_2 - - def _create_params(self): - initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) - with tf.variable_scope(self.name, initializer=initializer): - with tf.variable_scope("lstm"): - self.w_lstm = [] - for layer_id in range(self.lstm_num_layers): - with tf.variable_scope("layer_{}".format(layer_id)): - w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size]) - self.w_lstm.append(w) - - self.g_emb = tf.get_variable("g_emb", [1, self.lstm_size]) - with tf.variable_scope("emb"): - self.w_emb = tf.get_variable("w", [self.num_branches, self.lstm_size]) - with tf.variable_scope("softmax"): - self.w_soft = tf.get_variable("w", [self.lstm_size, self.num_branches]) - b_init = np.array([10.0, 10.0] + [0] * (self.num_branches - 2), - dtype=np.float32) - self.b_soft = tf.get_variable( - "b", [1, self.num_branches], - initializer=tf.constant_initializer(b_init)) - - b_soft_no_learn = np.array( - [0.25, 0.25] + [-0.25] * (self.num_branches - 2), dtype=np.float32) - b_soft_no_learn = np.reshape(b_soft_no_learn, [1, self.num_branches]) - self.b_soft_no_learn = tf.constant(b_soft_no_learn, dtype=tf.float32) - - with tf.variable_scope("attention"): - self.w_attn_1 = tf.get_variable("w_1", [self.lstm_size, self.lstm_size]) - self.w_attn_2 = tf.get_variable("w_2", [self.lstm_size, self.lstm_size]) - self.v_attn = tf.get_variable("v", [self.lstm_size, 1]) - - def _build_sampler(self, prev_c=None, prev_h=None, use_bias=False): - """Build the sampler ops and the log_prob ops.""" - - print("-" * 80) - print("Build controller sampler") - - anchors = tf.TensorArray( - tf.float32, size=self.num_cells + 2, clear_after_read=False) - anchors_w_1 = tf.TensorArray( - tf.float32, size=self.num_cells + 2, clear_after_read=False) - arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 4) - if prev_c is None: - assert prev_h is None, "prev_c and prev_h must both be None" - prev_c = [tf.zeros([1, self.lstm_size], tf.float32) - for _ in range(self.lstm_num_layers)] - prev_h = [tf.zeros([1, self.lstm_size], tf.float32) - for _ in range(self.lstm_num_layers)] - inputs = self.g_emb - - for layer_id in range(2): - next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) - prev_c, prev_h = next_c, next_h - anchors = anchors.write(layer_id, tf.zeros_like(next_h[-1])) - anchors_w_1 = anchors_w_1.write( - layer_id, tf.matmul(next_h[-1], self.w_attn_1)) - - def _condition(layer_id, *args): - return tf.less(layer_id, self.num_cells + 2) - - def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, - entropy, log_prob): - indices = tf.range(0, layer_id, dtype=tf.int32) - start_id = 4 * (layer_id - 2) - prev_layers = [] - for i in range(2): # index_1, index_2 - next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) - prev_c, prev_h = next_c, next_h - query = anchors_w_1.gather(indices) - query = tf.reshape(query, [layer_id, self.lstm_size]) - query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) - query = tf.matmul(query, self.v_attn) - logits = tf.reshape(query, [1, layer_id]) - if self.temperature is not None: - logits /= self.temperature - if self.tanh_constant is not None: - logits = self.tanh_constant * tf.tanh(logits) - index = tf.multinomial(logits, 1) - index = tf.to_int32(index) - index = tf.reshape(index, [1]) - arc_seq = arc_seq.write(start_id + 2 * i, index) - curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=index) - log_prob += curr_log_prob - curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( - logits=logits, labels=tf.nn.softmax(logits))) - entropy += curr_ent - prev_layers.append(anchors.read(tf.reduce_sum(index))) - inputs = prev_layers[-1] - - for i in range(2): # op_1, op_2 - next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) - prev_c, prev_h = next_c, next_h - logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft - if self.temperature is not None: - logits /= self.temperature - if self.tanh_constant is not None: - op_tanh = self.tanh_constant / self.op_tanh_reduce - logits = op_tanh * tf.tanh(logits) - if use_bias: - logits += self.b_soft_no_learn - op_id = tf.multinomial(logits, 1) - op_id = tf.to_int32(op_id) - op_id = tf.reshape(op_id, [1]) - arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id) - curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=op_id) - log_prob += curr_log_prob - curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( - logits=logits, labels=tf.nn.softmax(logits))) - entropy += curr_ent - inputs = tf.nn.embedding_lookup(self.w_emb, op_id) - - next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) - anchors = anchors.write(layer_id, next_h[-1]) - anchors_w_1 = anchors_w_1.write(layer_id, tf.matmul(next_h[-1], self.w_attn_1)) - inputs = self.g_emb - - return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1, - arc_seq, entropy, log_prob) - - loop_vars = [ - tf.constant(2, dtype=tf.int32, name="layer_id"), - inputs, - prev_c, - prev_h, - anchors, - anchors_w_1, - arc_seq, - tf.constant([0.0], dtype=tf.float32, name="entropy"), - tf.constant([0.0], dtype=tf.float32, name="log_prob"), - ] - - loop_outputs = tf.while_loop(_condition, _body, loop_vars, - parallel_iterations=1) - - arc_seq = loop_outputs[-3].stack() - arc_seq = tf.reshape(arc_seq, [-1]) - entropy = tf.reduce_sum(loop_outputs[-2]) - log_prob = tf.reduce_sum(loop_outputs[-1]) - - last_c = loop_outputs[-7] - last_h = loop_outputs[-6] - - return arc_seq, entropy, log_prob, last_c, last_h - - def build_trainer(self, child_model): - child_model.build_valid_rl() - self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / - tf.to_float(child_model.batch_size)) - self.reward = self.valid_acc - - if self.entropy_weight is not None: - self.reward += self.entropy_weight * self.sample_entropy - - self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) - self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) - baseline_update = tf.assign_sub( - self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) - - with tf.control_dependencies([baseline_update]): - self.reward = tf.identity(self.reward) - - self.loss = self.sample_log_prob * (self.reward - self.baseline) - self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") - - tf_variables = [var for var in tf.trainable_variables() if var.name.startswith(self.name)] - print("-" * 80) - for var in tf_variables: - print(var) - - self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( - self.loss, - tf_variables, - self.train_step, - clip_mode=self.clip_mode, - grad_bound=self.grad_bound, - l2_reg=self.l2_reg, - lr_init=self.lr_init, - lr_dec_start=self.lr_dec_start, - lr_dec_every=self.lr_dec_every, - lr_dec_rate=self.lr_dec_rate, - optim_algo=self.optim_algo, - sync_replicas=self.sync_replicas, - num_aggregate=self.num_aggregate, - num_replicas=self.num_replicas) - - self.skip_rate = tf.constant(0.0, dtype=tf.float32) + def __init__(self, + search_for="both", + search_whole_channels=False, + num_branches=6, + num_cells=6, + lstm_size=32, + lstm_num_layers=2, + lstm_keep_prob=1.0, + tanh_constant=None, + op_tanh_reduce=1.0, + temperature=None, + lr_init=1e-3, + lr_dec_start=0, + lr_dec_every=100, + lr_dec_rate=0.9, + l2_reg=0, + entropy_weight=None, + clip_mode=None, + grad_bound=None, + use_critic=False, + bl_dec=0.999, + optim_algo="adam", + sync_replicas=False, + num_aggregate=None, + num_replicas=None, + name="controller", + max_loss=0, + translation_only=False, + rotation_only=False, + dataset="cifar", + **kwargs): + + print("-" * 80) + print("Building ConvController") + + self.search_for = search_for + self.search_whole_channels = search_whole_channels + self.num_cells = num_cells + self.num_branches = num_branches + + self.lstm_size = lstm_size + self.lstm_num_layers = lstm_num_layers + self.lstm_keep_prob = lstm_keep_prob + self.tanh_constant = tanh_constant + self.op_tanh_reduce = op_tanh_reduce + self.temperature = temperature + self.lr_init = lr_init + self.lr_dec_start = lr_dec_start + self.lr_dec_every = lr_dec_every + self.lr_dec_rate = lr_dec_rate + self.l2_reg = l2_reg + self.entropy_weight = entropy_weight + self.clip_mode = clip_mode + self.grad_bound = grad_bound + self.use_critic = use_critic + self.bl_dec = bl_dec + + self.optim_algo = optim_algo + self.sync_replicas = sync_replicas + self.num_aggregate = num_aggregate + self.num_replicas = num_replicas + self.name = name + self.dataset = dataset + self.max_loss = max_loss + self.rotation_only = rotation_only + self.translation_only = translation_only + + self._create_params() + arc_seq_1, entropy_1, log_prob_1, c, h = self._build_sampler( + use_bias=True) + arc_seq_2, entropy_2, log_prob_2, _, _ = self._build_sampler( + prev_c=c, prev_h=h) + self.sample_arc = (arc_seq_1, arc_seq_2) + self.sample_entropy = entropy_1 + entropy_2 + self.sample_log_prob = log_prob_1 + log_prob_2 + + def _create_params(self): + initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) + with tf.variable_scope(self.name, initializer=initializer): + with tf.variable_scope("lstm"): + self.w_lstm = [] + for layer_id in range(self.lstm_num_layers): + with tf.variable_scope("layer_{}".format(layer_id)): + w = tf.get_variable( + "w", [2 * self.lstm_size, 4 * self.lstm_size]) + self.w_lstm.append(w) + + self.g_emb = tf.get_variable("g_emb", [1, self.lstm_size]) + with tf.variable_scope("emb"): + self.w_emb = tf.get_variable( + "w", [self.num_branches, self.lstm_size]) + with tf.variable_scope("softmax"): + self.w_soft = tf.get_variable( + "w", [self.lstm_size, self.num_branches]) + b_init = np.array([10.0, 10.0] + [0] * (self.num_branches - 2), + dtype=np.float32) + self.b_soft = tf.get_variable( + "b", [1, self.num_branches], + initializer=tf.constant_initializer(b_init)) + + b_soft_no_learn = np.array( + [0.25, 0.25] + [-0.25] * (self.num_branches - 2), dtype=np.float32) + b_soft_no_learn = np.reshape( + b_soft_no_learn, [1, self.num_branches]) + self.b_soft_no_learn = tf.constant( + b_soft_no_learn, dtype=tf.float32) + + with tf.variable_scope("attention"): + self.w_attn_1 = tf.get_variable( + "w_1", [self.lstm_size, self.lstm_size]) + self.w_attn_2 = tf.get_variable( + "w_2", [self.lstm_size, self.lstm_size]) + self.v_attn = tf.get_variable("v", [self.lstm_size, 1]) + + def _build_sampler(self, prev_c=None, prev_h=None, use_bias=False): + """Build the sampler ops and the log_prob ops.""" + + print("-" * 80) + print("Build controller sampler") + + anchors = tf.TensorArray( + tf.float32, size=self.num_cells + 2, clear_after_read=False) + anchors_w_1 = tf.TensorArray( + tf.float32, size=self.num_cells + 2, clear_after_read=False) + arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 4) + if prev_c is None: + assert prev_h is None, "prev_c and prev_h must both be None" + prev_c = [tf.zeros([1, self.lstm_size], tf.float32) + for _ in range(self.lstm_num_layers)] + prev_h = [tf.zeros([1, self.lstm_size], tf.float32) + for _ in range(self.lstm_num_layers)] + inputs = self.g_emb + + for layer_id in range(2): + next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) + prev_c, prev_h = next_c, next_h + anchors = anchors.write(layer_id, tf.zeros_like(next_h[-1])) + anchors_w_1 = anchors_w_1.write( + layer_id, tf.matmul(next_h[-1], self.w_attn_1)) + + def _condition(layer_id, *args): + return tf.less(layer_id, self.num_cells + 2) + + def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, + entropy, log_prob): + indices = tf.range(0, layer_id, dtype=tf.int32) + start_id = 4 * (layer_id - 2) + prev_layers = [] + for i in range(2): # index_1, index_2 + next_c, next_h = stack_lstm( + inputs, prev_c, prev_h, self.w_lstm) + prev_c, prev_h = next_c, next_h + query = anchors_w_1.gather(indices) + query = tf.reshape(query, [layer_id, self.lstm_size]) + query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) + query = tf.matmul(query, self.v_attn) + logits = tf.reshape(query, [1, layer_id]) + if self.temperature is not None: + logits /= self.temperature + if self.tanh_constant is not None: + logits = self.tanh_constant * tf.tanh(logits) + index = tf.multinomial(logits, 1) + index = tf.to_int32(index) + index = tf.reshape(index, [1]) + arc_seq = arc_seq.write(start_id + 2 * i, index) + curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=index) + log_prob += curr_log_prob + curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( + logits=logits, labels=tf.nn.softmax(logits))) + entropy += curr_ent + prev_layers.append(anchors.read(tf.reduce_sum(index))) + inputs = prev_layers[-1] + + for i in range(2): # op_1, op_2 + next_c, next_h = stack_lstm( + inputs, prev_c, prev_h, self.w_lstm) + prev_c, prev_h = next_c, next_h + logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft + if self.temperature is not None: + logits /= self.temperature + if self.tanh_constant is not None: + op_tanh = self.tanh_constant / self.op_tanh_reduce + logits = op_tanh * tf.tanh(logits) + if use_bias: + logits += self.b_soft_no_learn + op_id = tf.multinomial(logits, 1) + op_id = tf.to_int32(op_id) + op_id = tf.reshape(op_id, [1]) + arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id) + curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=op_id) + log_prob += curr_log_prob + curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( + logits=logits, labels=tf.nn.softmax(logits))) + entropy += curr_ent + inputs = tf.nn.embedding_lookup(self.w_emb, op_id) + + next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) + anchors = anchors.write(layer_id, next_h[-1]) + anchors_w_1 = anchors_w_1.write( + layer_id, tf.matmul(next_h[-1], self.w_attn_1)) + inputs = self.g_emb + + return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1, + arc_seq, entropy, log_prob) + + loop_vars = [ + tf.constant(2, dtype=tf.int32, name="layer_id"), + inputs, + prev_c, + prev_h, + anchors, + anchors_w_1, + arc_seq, + tf.constant([0.0], dtype=tf.float32, name="entropy"), + tf.constant([0.0], dtype=tf.float32, name="log_prob"), + ] + + loop_outputs = tf.while_loop(_condition, _body, loop_vars, + parallel_iterations=1) + + arc_seq = loop_outputs[-3].stack() + arc_seq = tf.reshape(arc_seq, [-1]) + entropy = tf.reduce_sum(loop_outputs[-2]) + log_prob = tf.reduce_sum(loop_outputs[-1]) + + last_c = loop_outputs[-7] + last_h = loop_outputs[-6] + + return arc_seq, entropy, log_prob, last_c, last_h + + def build_trainer(self, child_model): + child_model.build_valid_rl() + self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / + tf.to_float(child_model.batch_size)) + if self.dataset == "stacking": + # rewards like mse should grow fast as the distance from 0 shrinks, + # since the possible improvement gets smaller as you get closer to the exact goal pose + # use epsilon to avoid dividing by 0 + epsilon = 1e-12 + self.reward = 1 / tf.maximum(tf.abs(child_model.valid_shuffle_loss), epsilon) + # previous reward which sort of worked: + # self.reward = self.max_loss-child_model.valid_shuffle_loss + self.mse = child_model.valid_shuffle_loss + self.mae = child_model.valid_shuffle_mae + self.angle_error = child_model.valid_shuffle_angle_error + self.cart_error = child_model.valid_shuffle_cart_error + + else: + self.reward = self.valid_acc + self.mse = tf.zeros([1]) + self.mae = tf.zeros([1]) + self.angle_error = tf.zeros([1]) + self.cart_error = tf.zeros([1]) + + if self.entropy_weight is not None: + self.reward += self.entropy_weight * self.sample_entropy + + self.sample_log_prob_ = self.sample_log_prob + + self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) + self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) + baseline_update = tf.assign_sub( + self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) + + with tf.control_dependencies([baseline_update]): + self.reward = tf.identity(self.reward) + + self.loss = self.sample_log_prob * (self.reward - self.baseline) + self.train_step = tf.Variable( + 0, dtype=tf.int32, trainable=False, name="train_step") + + tf_variables = [var for var in tf.trainable_variables() + if var.name.startswith(self.name)] + print("-" * 80) + for var in tf_variables: + print(var) + + self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( + self.loss, + tf_variables, + self.train_step, + clip_mode=self.clip_mode, + grad_bound=self.grad_bound, + l2_reg=self.l2_reg, + lr_init=self.lr_init, + lr_dec_start=self.lr_dec_start, + lr_dec_every=self.lr_dec_every, + lr_dec_rate=self.lr_dec_rate, + optim_algo=self.optim_algo, + sync_replicas=self.sync_replicas, + num_aggregate=self.num_aggregate, + num_replicas=self.num_replicas) + + self.skip_rate = tf.constant(0.0, dtype=tf.float32) diff --git a/enas/cifar10/models.py b/enas/cifar10/models.py index 9e31587..5479130 100644 --- a/enas/cifar10/models.py +++ b/enas/cifar10/models.py @@ -9,283 +9,480 @@ from enas.cifar10.image_ops import batch_norm from enas.cifar10.image_ops import relu from enas.cifar10.image_ops import max_pool -from enas.cifar10.image_ops import global_avg_pool +from enas.cifar10.image_ops import global_max_pool from enas.utils import count_model_params from enas.utils import get_train_ops +from block_stacking_reader import CostarBlockStackingSequence +from keras.utils import OrderedEnqueuer +import glob + class Model(object): - def __init__(self, - images, - labels, - cutout_size=None, - batch_size=32, - eval_batch_size=100, - clip_mode=None, - grad_bound=None, - l2_reg=1e-4, - lr_init=0.1, - lr_dec_start=0, - lr_dec_every=100, - lr_dec_rate=0.1, - keep_prob=1.0, - optim_algo=None, - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - data_format="NHWC", - name="generic_model", - seed=None, - ): - """ - Args: - lr_dec_every: number of epochs to decay - """ - print("-" * 80) - print("Build model {}".format(name)) - - self.cutout_size = cutout_size - self.batch_size = batch_size - self.eval_batch_size = eval_batch_size - self.clip_mode = clip_mode - self.grad_bound = grad_bound - self.l2_reg = l2_reg - self.lr_init = lr_init - self.lr_dec_start = lr_dec_start - self.lr_dec_rate = lr_dec_rate - self.keep_prob = keep_prob - self.optim_algo = optim_algo - self.sync_replicas = sync_replicas - self.num_aggregate = num_aggregate - self.num_replicas = num_replicas - self.data_format = data_format - self.name = name - self.seed = seed - - self.global_step = None - self.valid_acc = None - self.test_acc = None - print("Build data ops") - with tf.device("/cpu:0"): - # training data - self.num_train_examples = np.shape(images["train"])[0] - self.num_train_batches = ( - self.num_train_examples + self.batch_size - 1) // self.batch_size - x_train, y_train = tf.train.shuffle_batch( - [images["train"], labels["train"]], - batch_size=self.batch_size, - capacity=50000, - enqueue_many=True, - min_after_dequeue=0, - num_threads=16, - seed=self.seed, - allow_smaller_final_batch=True, - ) - self.lr_dec_every = lr_dec_every * self.num_train_batches - - def _pre_process(x): - print("prep shape ",x.get_shape()) - dims = list(x.get_shape()) - dim = max(dims) - x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) - #x = tf.random_crop(x, [32, 32, 3], seed=self.seed) - x = tf.random_crop(x, dims, seed=self.seed) - x = tf.image.random_flip_left_right(x, seed=self.seed) - if self.cutout_size is not None: - mask = tf.ones([self.cutout_size, self.cutout_size], dtype=tf.int32) - start = tf.random_uniform([2], minval=0, maxval=dim, dtype=tf.int32) - mask = tf.pad(mask, [[self.cutout_size + start[0], dim - start[0]], - [self.cutout_size + start[1], dim - start[1]]]) - mask = mask[self.cutout_size: self.cutout_size + dim, - self.cutout_size: self.cutout_size + dim] - mask = tf.reshape(mask, [dim, dim, 1]) - mask = tf.tile(mask, [1, 1, dims[2]]) - x = tf.where(tf.equal(mask, 0), x=x, y=tf.zeros_like(x)) - if self.data_format == "NCHW": - x = tf.transpose(x, [2, 0, 1]) - - return x - self.x_train = tf.map_fn(_pre_process, x_train, back_prop=False) - self.y_train = y_train - - # valid data - self.x_valid, self.y_valid = None, None - if images["valid"] is not None: - images["valid_original"] = np.copy(images["valid"]) - labels["valid_original"] = np.copy(labels["valid"]) - if self.data_format == "NCHW": - images["valid"] = tf.transpose(images["valid"], [0, 3, 1, 2]) - self.num_valid_examples = np.shape(images["valid"])[0] - self.num_valid_batches = ( - (self.num_valid_examples + self.eval_batch_size - 1) - // self.eval_batch_size) - self.x_valid, self.y_valid = tf.train.batch( - [images["valid"], labels["valid"]], - batch_size=self.eval_batch_size, - capacity=5000, - enqueue_many=True, - num_threads=1, - allow_smaller_final_batch=True, - ) - - # test data - if self.data_format == "NCHW": - images["test"] = tf.transpose(images["test"], [0, 3, 1, 2]) - self.num_test_examples = np.shape(images["test"])[0] - self.num_test_batches = ( - (self.num_test_examples + self.eval_batch_size - 1) - // self.eval_batch_size) - self.x_test, self.y_test = tf.train.batch( - [images["test"], labels["test"]], - batch_size=self.eval_batch_size, - capacity=10000, - enqueue_many=True, - num_threads=1, - allow_smaller_final_batch=True, - ) - - # cache images and labels - self.images = images - self.labels = labels - - def eval_once(self, sess, eval_set, feed_dict=None, verbose=False): - """Expects self.acc and self.global_step to be defined. - - Args: - sess: tf.Session() or one of its wrap arounds. - feed_dict: can be used to give more information to sess.run(). - eval_set: "valid" or "test" - """ - - assert self.global_step is not None - global_step = sess.run(self.global_step) - print("Eval at {}".format(global_step)) - - if eval_set == "valid": - assert self.x_valid is not None - assert self.valid_acc is not None - num_examples = self.num_valid_examples - num_batches = self.num_valid_batches - acc_op = self.valid_acc - elif eval_set == "test": - assert self.test_acc is not None - num_examples = self.num_test_examples - num_batches = self.num_test_batches - acc_op = self.test_acc - else: - raise NotImplementedError("Unknown eval_set '{}'".format(eval_set)) - - total_acc = 0 - total_exp = 0 - for batch_id in range(num_batches): - acc = sess.run(acc_op, feed_dict=feed_dict) - total_acc += acc - total_exp += self.eval_batch_size - if verbose: - sys.stdout.write("\r{:<5d}/{:>5d}".format(total_acc, total_exp)) - if verbose: - print("") - print("{}_accuracy: {:<6.4f}".format( - eval_set, float(total_acc) / total_exp)) - - def _build_train(self): - print("Build train graph") - logits = self._model(self.x_train, True) - log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=self.y_train) - self.loss = tf.reduce_mean(log_probs) - - self.train_preds = tf.argmax(logits, axis=1) - self.train_preds = tf.to_int32(self.train_preds) - self.train_acc = tf.equal(self.train_preds, self.y_train) - self.train_acc = tf.to_int32(self.train_acc) - self.train_acc = tf.reduce_sum(self.train_acc) - - tf_variables = [var - for var in tf.trainable_variables() if var.name.startswith(self.name)] - self.num_vars = count_model_params(tf_variables) - print("-" * 80) - for var in tf_variables: - print(var) - - self.global_step = tf.Variable( - 0, dtype=tf.int32, trainable=False, name="global_step") - self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( - self.loss, - tf_variables, - self.global_step, - clip_mode=self.clip_mode, - grad_bound=self.grad_bound, - l2_reg=self.l2_reg, - lr_init=self.lr_init, - lr_dec_start=self.lr_dec_start, - lr_dec_every=self.lr_dec_every, - lr_dec_rate=self.lr_dec_rate, - optim_algo=self.optim_algo, - sync_replicas=self.sync_replicas, - num_aggregate=self.num_aggregate, - num_replicas=self.num_replicas) - - def _build_valid(self): - if self.x_valid is not None: - print("-" * 80) - print("Build valid graph") - logits = self._model(self.x_valid, False, reuse=True) - self.valid_preds = tf.argmax(logits, axis=1) - self.valid_preds = tf.to_int32(self.valid_preds) - self.valid_acc = tf.equal(self.valid_preds, self.y_valid) - self.valid_acc = tf.to_int32(self.valid_acc) - self.valid_acc = tf.reduce_sum(self.valid_acc) - - def _build_test(self): - print("-" * 80) - print("Build test graph") - logits = self._model(self.x_test, False, reuse=True) - self.test_preds = tf.argmax(logits, axis=1) - self.test_preds = tf.to_int32(self.test_preds) - self.test_acc = tf.equal(self.test_preds, self.y_test) - self.test_acc = tf.to_int32(self.test_acc) - self.test_acc = tf.reduce_sum(self.test_acc) - - def build_valid_rl(self, shuffle=False): - print("-" * 80) - print("Build valid graph on shuffled data") - with tf.device("/cpu:0"): - # shuffled valid data: for choosing validation model - if not shuffle and self.data_format == "NCHW": - self.images["valid_original"] = np.transpose( - self.images["valid_original"], [0, 3, 1, 2]) - x_valid_shuffle, y_valid_shuffle = tf.train.shuffle_batch( - [self.images["valid_original"], self.labels["valid_original"]], - batch_size=self.batch_size, - capacity=25000, - enqueue_many=True, - min_after_dequeue=0, - num_threads=16, - seed=self.seed, - allow_smaller_final_batch=True, - ) - - def _pre_process(x): - x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) - x = tf.random_crop(x, list(x.get_shape()), seed=self.seed) - x = tf.image.random_flip_left_right(x, seed=self.seed) - if self.data_format == "NCHW": - x = tf.transpose(x, [2, 0, 1]) - - return x - - if shuffle: - x_valid_shuffle = tf.map_fn(_pre_process, x_valid_shuffle, - back_prop=False) - - logits = self._model(x_valid_shuffle, False, reuse=True) - valid_shuffle_preds = tf.argmax(logits, axis=1) - valid_shuffle_preds = tf.to_int32(valid_shuffle_preds) - self.valid_shuffle_acc = tf.equal(valid_shuffle_preds, y_valid_shuffle) - self.valid_shuffle_acc = tf.to_int32(self.valid_shuffle_acc) - self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) - - def _model(self, images, is_training, reuse=None): - raise NotImplementedError("Abstract method") + def __init__(self, + images, + labels, + cutout_size=None, + batch_size=32, + eval_batch_size=32, + clip_mode=None, + grad_bound=None, + l2_reg=1e-4, + lr_init=0.1, + lr_dec_start=0, + lr_dec_every=100, + lr_dec_rate=0.1, + keep_prob=1.0, + optim_algo=None, + sync_replicas=False, + num_aggregate=None, + num_replicas=None, + data_format="NHWC", + name="generic_model", + seed=None, + valid_set_size=32, + image_shape=(32, 32, 3), + translation_only=False, + rotation_only=False, + stacking_reward=False, + use_root=False, + dataset="cifar", + data_base_path="", + one_hot_encoding=False, + random_augmentation=None + ): + """ + Args: + lr_dec_every: number of epochs to decay + """ + print("-" * 80) + print("Build model {}".format(name)) + + self.cutout_size = cutout_size + self.batch_size = batch_size + # TODO change back to eval_batch size, pass eval_batch_size from arguments + self.eval_batch_size = batch_size + self.clip_mode = clip_mode + self.grad_bound = grad_bound + self.l2_reg = l2_reg + self.lr_init = lr_init + self.lr_dec_start = lr_dec_start + self.lr_dec_rate = lr_dec_rate + self.keep_prob = keep_prob + self.optim_algo = optim_algo + self.sync_replicas = sync_replicas + self.num_aggregate = num_aggregate + self.num_replicas = num_replicas + self.data_format = data_format + self.name = name + self.seed = seed + self.dataset = dataset + self.valid_set_size = valid_set_size + self.image_shape = image_shape + self.rotation_only = rotation_only + self.translation_only = translation_only + self.stacking_reward = stacking_reward + self.random_augmentation = random_augmentation + self.data_base_path = data_base_path + self.use_root = use_root + self.one_hot_encoding = one_hot_encoding + + self.global_step = None + self.valid_acc = None + self.test_acc = None + print("Build data ops") + with tf.device("/cpu:0"): + # training data + + # Support for stacking generator + print("dataset----------------------", self.dataset) + if self.dataset == "stacking": + Dataset = tf.data.Dataset + flags = tf.app.flags + FLAGS = flags.FLAGS + np.random.seed(0) + val_test_size = self.valid_set_size + if images["path"] != "": + print("datadir------------", images["path"]) + file_names = glob.glob(os.path.expanduser(images["path"])) + train_data = file_names[val_test_size*2:] + validation_data = file_names[val_test_size:val_test_size*2] + self.validation_data = validation_data + test_data = file_names[:val_test_size] + else: + print("-------Loading train-test-val from txt files-------") + self.data_base_path = os.path.expanduser(self.data_base_path) + with open(self.data_base_path + 'costar_block_stacking_v0.3_success_only_train_files.txt', mode='r') as myfile: + train_data = myfile.read().splitlines() + with open(self.data_base_path + 'costar_block_stacking_v0.3_success_only_test_files.txt', mode='r') as myfile: + test_data = myfile.read().splitlines() + with open(self.data_base_path + 'costar_block_stacking_v0.3_success_only_val_files.txt', mode='r') as myfile: + validation_data = myfile.read().splitlines() + print(train_data) + # train_data = [self.data_base_path + name for name in train_data] + # test_data = [self.data_base_path + name for name in test_data] + # validation_data = [self.data_base_path + name for name in validation_data] + print(validation_data) + # number of images to look at per example + # TODO(ahundt) currently there is a bug in one of these calculations, lowering images per example to reduce number of steps per epoch for now. + estimated_images_per_example = 2 + print("valid set size", val_test_size) + # TODO(ahundt) fix quick hack to proceed through epochs faster + # self.num_train_examples = len(train_data) * self.batch_size * estimated_images_per_example + # self.num_train_batches = (self.num_train_examples + self.batch_size - 1) // self.batch_size + self.num_train_examples = len(train_data) * estimated_images_per_example + self.num_train_batches = (self.num_train_examples + self.batch_size - 1) // self.batch_size + # output_shape = (32, 32, 3) + # WARNING: IF YOU ARE EDITING THIS CODE, MAKE SURE TO ALSO CHECK micro_controller.py and micro_child.py WHICH ALSO HAS A GENERATOR + if self.translation_only is True: + # We've found evidence (but not concluded finally) in hyperopt + # that input of the rotation component actually + # lowers translation accuracy at least in the colored block case + # switch between the two commented lines to go back to the prvious behavior + # data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_15'] + # self.data_features_len = 15 + data_features = ['image_0_image_n_vec_xyz_nxygrid_12'] + self.data_features_len = 12 + label_features = ['grasp_goal_xyz_3'] + self.num_classes = 3 + elif self.rotation_only is True: + data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_15'] + self.data_features_len = 15 + # disabled 2 lines below below because best run 2018_12_2054 was with settings above + # include a normalized xy grid, similar to uber's coordconv + # data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17'] + # self.data_features_len = 17 + label_features = ['grasp_goal_aaxyz_nsc_5'] + self.num_classes = 5 + elif self.stacking_reward is True: + data_features = ['image_0_image_n_vec_0_vec_n_xyz_aaxyz_nsc_nxygrid_25'] + self.data_features_len = 25 + label_features = ['stacking_reward'] + self.num_classes = 1 + # elif self.use_root is True: + # data_features = ['current_xyz_aaxyz_nsc_8'] + # self.data_features_len = 8 + # label_features = ['grasp_goal_xyz_3'] + # self.num_classes = 8 + else: + # original input block + # data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_15'] + # include a normalized xy grid, similar to uber's coordconv + data_features = ['image_0_image_n_vec_xyz_aaxyz_nsc_nxygrid_17'] + self.data_features_len = 17 + label_features = ['grasp_goal_xyz_aaxyz_nsc_8'] + self.num_classes = 8 + if self.one_hot_encoding: + self.data_features_len += 40 + training_generator = CostarBlockStackingSequence( + train_data, batch_size=batch_size, verbose=0, + label_features_to_extract=label_features, + data_features_to_extract=data_features, output_shape=self.image_shape, shuffle=True, + random_augmentation=self.random_augmentation, one_hot_encoding=self.one_hot_encoding) + + train_enqueuer = OrderedEnqueuer( + training_generator, + use_multiprocessing=False, + shuffle=True) + train_enqueuer.start(workers=10, max_queue_size=100) + + def train_generator(): return iter(train_enqueuer.get()) + + train_dataset = Dataset.from_generator(train_generator, (tf.float32, tf.float32), (tf.TensorShape( + [None, self.image_shape[0], self.image_shape[1], self.data_features_len]), tf.TensorShape([None, None]))) + # if self.use_root is True: + # train_dataset = Dataset.from_generator(train_generator, (tf.float32, tf.float32), (tf.TensorShape( + # [None, 2]), tf.TensorShape([None, None]))) + trainer = train_dataset.make_one_shot_iterator() + x_train, y_train = trainer.get_next() + # x_train_list = [] + # x_train_list[0] = np.reshape(x_train[0][0], [-1, self.image_shape[1], self.image_shape[2], 3]) + # x_train_list[1] = np.reshape(x_train[0][1], [-1, self.image_shape[1], self.image_shape[2], 3]) + # x_train_list[2] = np.reshape(x_train[0][2],[-1, ]) + # print("x shape--------------", x_train.shape) + print("batch--------------------------", + self.num_train_examples, self.num_train_batches) + print("y shape--------------", y_train.shape) + self.x_train = x_train + self.y_train = y_train + + else: + self.num_train_examples = np.shape(images["train"])[0] + self.num_classes = 10 + self.num_train_batches = ( + self.num_train_examples + self.batch_size - 1) // self.batch_size + + x_train, y_train = tf.train.shuffle_batch( + [images["train"], labels["train"]], + batch_size=self.batch_size, + capacity=50000, + enqueue_many=True, + min_after_dequeue=0, + num_threads=16, + seed=self.seed, + allow_smaller_final_batch=True, + ) + + def _pre_process(x): + print("prep shape ", x.get_shape()) + dims = list(x.get_shape()) + dim = max(dims) + x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) + #x = tf.random_crop(x, [32, 32, 3], seed=self.seed) + x = tf.random_crop(x, dims, seed=self.seed) + x = tf.image.random_flip_left_right(x, seed=self.seed) + if self.cutout_size is not None: + mask = tf.ones( + [self.cutout_size, self.cutout_size], dtype=tf.int32) + start = tf.random_uniform( + [2], minval=0, maxval=dim, dtype=tf.int32) + mask = tf.pad(mask, [[self.cutout_size + start[0], dim - start[0]], + [self.cutout_size + start[1], dim - start[1]]]) + mask = mask[self.cutout_size: self.cutout_size + dim, + self.cutout_size: self.cutout_size + dim] + mask = tf.reshape(mask, [dim, dim, 1]) + mask = tf.tile(mask, [1, 1, dims[2]]) + x = tf.where(tf.equal(mask, 0), x=x, + y=tf.zeros_like(x)) + if self.data_format == "NCHW": + x = tf.transpose(x, [2, 0, 1]) + + return x + self.x_train = tf.map_fn( + _pre_process, x_train, back_prop=False) + self.y_train = y_train + self.lr_dec_every = lr_dec_every * self.num_train_batches + + # valid data + self.x_valid, self.y_valid = None, None + if self.dataset == "stacking": + # TODO + validation_generator = CostarBlockStackingSequence( + validation_data, batch_size=batch_size, verbose=0, + label_features_to_extract=label_features, + data_features_to_extract=data_features, output_shape=self.image_shape, one_hot_encoding=self.one_hot_encoding) + validation_enqueuer = OrderedEnqueuer( + validation_generator, + use_multiprocessing=False, + shuffle=True) + validation_enqueuer.start(workers=10, max_queue_size=100) + + def validation_generator(): return iter(validation_enqueuer.get()) + validation_dataset = Dataset.from_generator(validation_generator, (tf.float32, tf.float32), (tf.TensorShape( + [None, self.image_shape[0], self.image_shape[1], self.data_features_len]), tf.TensorShape([None, None]))) + self.num_valid_examples = len( + validation_data) * self.eval_batch_size * estimated_images_per_example + self.num_valid_batches = ( + self.num_valid_examples + self.eval_batch_size - 1) // self.eval_batch_size + self.x_valid, self.y_valid = validation_dataset.make_one_shot_iterator().get_next() + print("x-v........-------------", self.x_valid.shape) + if "valid_original" not in images.keys(): + images["valid_original"] = np.copy(self.x_valid) + labels["valid_original"] = np.copy(self.y_valid) + else: + if images["valid"] is not None: + images["valid_original"] = np.copy(images["valid"]) + labels["valid_original"] = np.copy(labels["valid"]) + if self.data_format == "NCHW": + images["valid"] = tf.transpose( + images["valid"], [0, 3, 1, 2]) + self.num_valid_examples = np.shape(images["valid"])[0] + self.num_valid_batches = ( + (self.num_valid_examples + self.eval_batch_size - 1) + // self.eval_batch_size) + self.x_valid, self.y_valid = tf.train.batch( + [images["valid"], labels["valid"]], + batch_size=self.eval_batch_size, + capacity=5000, + enqueue_many=True, + num_threads=1, + allow_smaller_final_batch=True, + ) + + # test data + if self.dataset == "stacking": + # TODO + test_generator = CostarBlockStackingSequence( + test_data, batch_size=batch_size, verbose=0, + label_features_to_extract=label_features, + data_features_to_extract=data_features, output_shape=self.image_shape, one_hot_encoding=self.one_hot_encoding) + test_enqueuer = OrderedEnqueuer( + test_generator, + use_multiprocessing=False, + shuffle=True) + test_enqueuer.start(workers=10, max_queue_size=100) + + def test_generator(): return iter(test_enqueuer.get()) + test_dataset = Dataset.from_generator(test_generator, (tf.float32, tf.float32), (tf.TensorShape( + [None, self.image_shape[0], self.image_shape[1], self.data_features_len]), tf.TensorShape([None, None]))) + self.num_test_examples = len( + test_data) * self.eval_batch_size * estimated_images_per_example + self.num_test_batches = ( + self.num_valid_examples + self.eval_batch_size - 1) // self.eval_batch_size + self.x_test, self.y_test = test_dataset.make_one_shot_iterator().get_next() + else: + if self.data_format == "NCHW": + images["test"] = tf.transpose(images["test"], [0, 3, 1, 2]) + self.num_test_examples = np.shape(images["test"])[0] + self.num_test_batches = ( + (self.num_test_examples + self.eval_batch_size - 1) + // self.eval_batch_size) + self.x_test, self.y_test = tf.train.batch( + [images["test"], labels["test"]], + batch_size=self.eval_batch_size, + capacity=10000, + enqueue_many=True, + num_threads=1, + allow_smaller_final_batch=True, + ) + + # cache images and labels + self.images = images + self.labels = labels + + def eval_once(self, sess, eval_set, feed_dict=None, verbose=False): + """Expects self.acc and self.global_step to be defined. + + Args: + sess: tf.Session() or one of its wrap arounds. + feed_dict: can be used to give more information to sess.run(). + eval_set: "valid" or "test" + """ + + assert self.global_step is not None + global_step = sess.run(self.global_step) + print("Eval at {}".format(global_step)) + + if eval_set == "valid": + assert self.x_valid is not None + assert self.valid_acc is not None + num_examples = self.num_valid_examples + num_batches = self.num_valid_batches + acc_op = self.valid_acc + elif eval_set == "test": + assert self.test_acc is not None + num_examples = self.num_test_examples + num_batches = self.num_test_batches + acc_op = self.test_acc + else: + raise NotImplementedError("Unknown eval_set '{}'".format(eval_set)) + + total_acc = 0 + total_exp = 0 + for batch_id in range(num_batches): + acc = sess.run(acc_op, feed_dict=feed_dict) + total_acc += acc + total_exp += self.eval_batch_size + if verbose: + sys.stdout.write( + "\r{:<5d}/{:>5d}".format(total_acc, total_exp)) + if verbose: + print("") + print("{}_accuracy: {:<6.4f}".format( + eval_set, float(total_acc) / total_exp)) + + def _build_train(self): + print("Build train graph") + logits = self._model(self.x_train, True) + log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=self.y_train) + self.loss = tf.reduce_mean(log_probs) + + self.train_preds = tf.argmax(logits, axis=1) + self.train_preds = tf.to_int32(self.train_preds) + self.train_acc = tf.equal(self.train_preds, self.y_train) + self.train_acc = tf.to_int32(self.train_acc) + self.train_acc = tf.reduce_sum(self.train_acc) + + tf_variables = [var + for var in tf.trainable_variables() if var.name.startswith(self.name)] + self.num_vars = count_model_params(tf_variables) + print("-" * 80) + for var in tf_variables: + print(var) + + self.global_step = tf.Variable( + 0, dtype=tf.int32, trainable=False, name="global_step") + self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( + self.loss, + tf_variables, + self.global_step, + clip_mode=self.clip_mode, + grad_bound=self.grad_bound, + l2_reg=self.l2_reg, + lr_init=self.lr_init, + lr_dec_start=self.lr_dec_start, + lr_dec_every=self.lr_dec_every, + lr_dec_rate=self.lr_dec_rate, + optim_algo=self.optim_algo, + sync_replicas=self.sync_replicas, + num_aggregate=self.num_aggregate, + num_replicas=self.num_replicas) + + def _build_valid(self): + if self.x_valid is not None: + print("-" * 80) + print("Build valid graph") + logits = self._model(self.x_valid, False, reuse=True) + self.valid_preds = tf.argmax(logits, axis=1) + self.valid_preds = tf.to_int32(self.valid_preds) + self.valid_acc = tf.equal(self.valid_preds, self.y_valid) + self.valid_acc = tf.to_int32(self.valid_acc) + self.valid_acc = tf.reduce_sum(self.valid_acc) + + def _build_test(self): + print("-" * 80) + print("Build test graph") + logits = self._model(self.x_test, False, reuse=True) + self.test_preds = tf.argmax(logits, axis=1) + self.test_preds = tf.to_int32(self.test_preds) + self.test_acc = tf.equal(self.test_preds, self.y_test) + self.test_acc = tf.to_int32(self.test_acc) + self.test_acc = tf.reduce_sum(self.test_acc) + + def build_valid_rl(self, shuffle=False): + print("-" * 80) + print("Build valid graph on shuffled data") + if self.dataset == "stacking": + # TODO + x_valid_shuffle, y_valid_shuffle = self.x_valid, self.y_valid + else: + with tf.device("/cpu:0"): + # shuffled valid data: for choosing validation model + if not shuffle and self.data_format == "NCHW": + self.images["valid_original"] = np.transpose( + self.images["valid_original"], [0, 3, 1, 2]) + x_valid_shuffle, y_valid_shuffle = tf.train.shuffle_batch( + [self.images["valid_original"], self.labels["valid_original"]], + batch_size=self.batch_size, + capacity=25000, + enqueue_many=True, + min_after_dequeue=0, + num_threads=16, + seed=self.seed, + allow_smaller_final_batch=True, + ) + + def _pre_process(x): + x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) + x = tf.random_crop(x, list(x.get_shape()), seed=self.seed) + x = tf.image.random_flip_left_right(x, seed=self.seed) + if self.data_format == "NCHW": + x = tf.transpose(x, [2, 0, 1]) + + return x + + if shuffle: + x_valid_shuffle = tf.map_fn(_pre_process, x_valid_shuffle, + back_prop=False) + + logits = self._model(x_valid_shuffle, False, reuse=True) + valid_shuffle_preds = tf.argmax(logits, axis=1) + valid_shuffle_preds = tf.to_int32(valid_shuffle_preds) + self.valid_shuffle_acc = tf.equal(valid_shuffle_preds, y_valid_shuffle) + self.valid_shuffle_acc = tf.to_int32(self.valid_shuffle_acc) + self.valid_shuffle_acc = tf.reduce_sum(self.valid_shuffle_acc) + + def _model(self, images, is_training, reuse=None): + raise NotImplementedError("Abstract method") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..39cf010 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pillow +matplotlib +scikit-image +pyquaternion +keras diff --git a/scripts/costar_block_stacking_reward_search.sh b/scripts/costar_block_stacking_reward_search.sh new file mode 100755 index 0000000..bd99961 --- /dev/null +++ b/scripts/costar_block_stacking_reward_search.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="stacking_outputs_stacking_reward" --batch_size=16 --num_epochs=600 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=10 --child_out_filters=32 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=5 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=1.0 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.0001 --controller_train_every=1 --controller_sync_replicas --controller_num_aggregate=10 --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --data_path="~/.keras/datasets/costar_block_stacking_dataset_v0.2/*success.h5f" --dataset="stacking" --height_img 96 --width_img 96 --stacking_reward --max_loss=2 \ No newline at end of file diff --git a/scripts/costar_block_stacking_rotation_final.sh b/scripts/costar_block_stacking_rotation_final.sh new file mode 100755 index 0000000..44b490d --- /dev/null +++ b/scripts/costar_block_stacking_rotation_final.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +export PYTHONPATH="$(pwd)" + + +# Epoch 620: Training controller +# +# ctrl_step=18570 controller_loss=1494.188 ent=31.84 lr=0.0035 |g|=0.0002 acc=0.6875 bl=62.99 mins=5730.29 rw =111.040000916 mse =0.00900602154434 +# angle_error=0.216985523701 +# mae=0.126344487071 +# -------------------------------------------------------------------------------- +# [1 4 1 1 1 1 1 4 3 1 3 0] +# [1 2 1 4 2 2 1 1 2 3 1 1] +# val_acc=1.0000 +# controller_loss=6322.51025391 +# mse=0.00370951113291 +# angle_error=0.104434356093 +# mae=0.126305446029 +# ------------------------- +fixed_arc="1 4 1 1 1 1 1 4 3 1 3 0" +fixed_arc="$fixed_arc 1 4 1 1 1 1 1 4 3 1 3 0" + +python enas/cifar10/main.py \ + --data_format="NHWC" \ + --search_for="micro" \ + --reset_output_dir \ + --output_dir="2018_09_14_1249_stacking_outputs_rotation_final_with_root_msle" \ + --batch_size=32 \ + --num_epochs=630 \ + --log_every=50 \ + --eval_every_epochs=10 \ + --child_fixed_arc="${fixed_arc}" \ + --child_use_aux_heads \ + --child_num_layers=10 \ + --child_out_filters=36 \ + --child_num_branches=5 \ + --child_num_cells=3 \ + --child_keep_prob=0.80 \ + --child_drop_path_keep_prob=0.60 \ + --child_l2_reg=2e-4 \ + --child_lr_cosine \ + --child_lr_max=1.0 \ + --child_lr_min=0.0001 \ + --child_lr_T_0=10 \ + --child_lr_T_mul=2 \ + --nocontroller_training \ + --controller_search_whole_channels \ + --controller_entropy_weight=0.0001 \ + --controller_train_every=1 \ + --controller_sync_replicas \ + --controller_num_aggregate=10 \ + --controller_train_steps=50 \ + --controller_lr=0.001 \ + --controller_tanh_constant=1.50 \ + --controller_op_tanh_reduce=2.5 \ + --dataset="stacking" \ + --height_img 64 \ + --width_img 64 \ + --rotation_only \ + --max_loss=2 \ + --use_root \ + --use_msle \ + --one_hot_encoding \ + "$@" + diff --git a/scripts/costar_block_stacking_rotation_no_root_final.sh b/scripts/costar_block_stacking_rotation_no_root_final.sh new file mode 100644 index 0000000..15bcce6 --- /dev/null +++ b/scripts/costar_block_stacking_rotation_no_root_final.sh @@ -0,0 +1,56 @@ +#!/bin/bash +export PYTHONPATH="$(pwd)" +# from 2018_09_15_0953_stacking_outputs_rotation_search_no_root_no_msle.txt +# Epoch 300 +# [1 2 0 1 2 3 1 0 0 3 2 0] │························· +# [0 1 1 1 0 2 2 0 1 3 1 3] +# val_acc=0.9375 │························· +# controller_loss=3453.15795898 │························· +# mse=0.00610682461411 │························· +# angle_error=0.122636593878 │························· +# mae=0.12954197824 +# ------------------------- +fixed_arc="1 2 0 1 2 3 1 0 0 3 2 0" +fixed_arc="$fixed_arc 0 1 1 1 0 2 2 0 1 3 1 3" + +python enas/cifar10/main.py \ + --data_format="NHWC" \ + --search_for="micro" \ + --reset_output_dir \ + --output_dir="2018_09_17_1723_stacking_outputs_rotation_without_root_msle_final" \ + --batch_size=64 \ + --num_epochs=630 \ + --log_every=50 \ + --eval_every_epochs=10 \ + --child_fixed_arc="${fixed_arc}" \ + --child_use_aux_heads \ + --child_num_layers=10 \ + --child_out_filters=36 \ + --child_num_branches=5 \ + --child_num_cells=3 \ + --child_keep_prob=0.80 \ + --child_drop_path_keep_prob=0.60 \ + --child_l2_reg=2e-4 \ + --child_lr_cosine \ + --child_lr_max=1.0 \ + --child_lr_min=0.0001 \ + --child_lr_T_0=10 \ + --child_lr_T_mul=2 \ + --nocontroller_training \ + --controller_search_whole_channels \ + --controller_entropy_weight=0.0001 \ + --controller_train_every=1 \ + --controller_sync_replicas \ + --controller_num_aggregate=10 \ + --controller_train_steps=50 \ + --controller_lr=0.001 \ + --controller_tanh_constant=1.50 \ + --controller_op_tanh_reduce=2.5 \ + --dataset="stacking" \ + --height_img 64 \ + --width_img 64 \ + --rotation_only \ + --max_loss=2 \ + --use_msle \ + --one_hot_encoding \ + "$@" \ No newline at end of file diff --git a/scripts/costar_block_stacking_rotation_search.sh b/scripts/costar_block_stacking_rotation_search.sh new file mode 100755 index 0000000..a4f3dcd --- /dev/null +++ b/scripts/costar_block_stacking_rotation_search.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="stacking_outputs_rotation_with_root" --batch_size=16 --num_epochs=640 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=8 --child_out_filters=20 --child_filter_size=5 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=3 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=1.0 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.0001 --controller_train_every=1 --controller_sync_replicas --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --dataset="stacking" --height_img 64 --width_img 64 --rotation_only --max_loss=2 --use_root --one_hot_encoding \ No newline at end of file diff --git a/scripts/costar_block_stacking_rotation_search_no_root.sh b/scripts/costar_block_stacking_rotation_search_no_root.sh new file mode 100755 index 0000000..392a8fb --- /dev/null +++ b/scripts/costar_block_stacking_rotation_search_no_root.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="stacking_outputs_rotation_with_root" --batch_size=16 --num_epochs=640 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=8 --child_out_filters=20 --child_filter_size=5 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=3 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=1.0 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.0001 --controller_train_every=1 --controller_sync_replicas --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --dataset="stacking" --height_img 64 --width_img 64 --rotation_only --max_loss=2 --one_hot_encoding \ No newline at end of file diff --git a/scripts/costar_block_stacking_search.sh b/scripts/costar_block_stacking_search.sh new file mode 100755 index 0000000..eb60b19 --- /dev/null +++ b/scripts/costar_block_stacking_search.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="outputs" --batch_size=16 --num_epochs=600 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=6 --child_out_filters=20 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=3 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=0.5 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.00001 --controller_train_every=1 --controller_sync_replicas --controller_num_aggregate=10 --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --data_path="~/.keras/datasets/costar_block_stacking_dataset_v0.2/*success.h5f" --dataset="stacking" --height_img 128 --width_img 128 --max_loss=5 \ No newline at end of file diff --git a/scripts/costar_block_stacking_translation_final.sh b/scripts/costar_block_stacking_translation_final.sh new file mode 100755 index 0000000..20e8d59 --- /dev/null +++ b/scripts/costar_block_stacking_translation_final.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +export PYTHONPATH="$(pwd)" +# from 2018_09_09_2230_micro_translation_search_output_stack.txt +# [0 3 0 2 0 0 1 2 2 4 0 1 0 0 3 0 3 4 2 2] +# [1 4 1 1 2 3 1 0 2 4 0 1 2 1 4 4 2 1 3 2] +# val_acc=0.0625 +# controller_loss=126626.984375 +# mse=0.000223237744649 +# cart_error=0.0890415906906 +# mae=0.0595378726721 + +fixed_arc="0 3 0 2 0 0 1 2 2 4 0 1 0 0 3 0 3 4 2 2" +fixed_arc="$fixed_arc 1 4 1 1 2 3 1 0 2 4 0 1 2 1 4 4 2 1 3 2" + +python enas/cifar10/main.py \ + --data_format="NHWC" \ + --search_for="micro" \ + --reset_output_dir \ + --output_dir="2018_09_09_2230_stacking_outputs_translation_final_with_root" \ + --batch_size=64 \ + --num_epochs=630 \ + --log_every=50 \ + --eval_every_epochs=1 \ + --child_fixed_arc="${fixed_arc}" \ + --child_use_aux_heads \ + --child_num_layers=10 \ + --child_out_filters=36 \ + --child_num_branches=5 \ + --child_num_cells=5 \ + --child_keep_prob=0.80 \ + --child_drop_path_keep_prob=0.60 \ + --child_l2_reg=2e-4 \ + --child_lr_cosine \ + --child_lr_max=1.0 \ + --child_lr_min=0.0001 \ + --child_lr_T_0=10 \ + --child_lr_T_mul=2 \ + --nocontroller_training \ + --controller_search_whole_channels \ + --controller_entropy_weight=0.0001 \ + --controller_train_every=1 \ + --controller_sync_replicas \ + --controller_num_aggregate=10 \ + --controller_train_steps=50 \ + --controller_lr=0.001 \ + --controller_tanh_constant=1.50 \ + --controller_op_tanh_reduce=2.5 \ + --dataset="stacking" \ + --height_img 64 \ + --width_img 64 \ + --translation_only \ # training on translation component of block stacking poses + --max_loss=2 \ + --use_root \ # based on HyperTree "root" in hypertree code + --one_hot_encoding \ # action will be one hot encoded + "$@" diff --git a/scripts/costar_block_stacking_translation_root_final.sh b/scripts/costar_block_stacking_translation_root_final.sh new file mode 100644 index 0000000..ed82de2 --- /dev/null +++ b/scripts/costar_block_stacking_translation_root_final.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +export PYTHONPATH="$(pwd)" +# from 2018_09_14_1818_stacking_outputs_translation_search_with_root.txt +# [0 1 0 0 0 1 0 4 0 3 0 3] +# [1 4 0 4 2 0 2 4 3 0 1 3] +# val_acc=0.0000 +# controller_loss=111188.476562 +# mse=0.000138682997203 +# cart_error=0.0660963505507 +# mae=0.0264506191015 + +fixed_arc="0 1 0 0 0 1 0 4 0 3 0 3" +fixed_arc="$fixed_arc 1 4 0 4 2 0 2 4 3 0 1 3" + +python enas/cifar10/main.py \ + --data_format="NHWC" \ + --search_for="micro" \ + --reset_output_dir \ + --output_dir="2018_09_17_1725_stacking_outputs_translation_with_root_final" \ + --batch_size=64 \ + --num_epochs=630 \ + --log_every=50 \ + --eval_every_epochs=10 \ + --child_fixed_arc="${fixed_arc}" \ + --child_use_aux_heads \ + --child_num_layers=10 \ + --child_out_filters=36 \ + --child_num_branches=5 \ + --child_num_cells=3 \ + --child_keep_prob=0.80 \ + --child_drop_path_keep_prob=0.60 \ + --child_l2_reg=2e-4 \ + --child_lr_cosine \ + --child_lr_max=1.0 \ + --child_lr_min=0.0001 \ + --child_lr_T_0=10 \ + --child_lr_T_mul=2 \ + --nocontroller_training \ + --controller_search_whole_channels \ + --controller_entropy_weight=0.0001 \ + --controller_train_every=1 \ + --controller_sync_replicas \ + --controller_num_aggregate=10 \ + --controller_train_steps=50 \ + --controller_lr=0.001 \ + --controller_tanh_constant=1.50 \ + --controller_op_tanh_reduce=2.5 \ + --dataset="stacking" \ + --height_img 64 \ + --width_img 64 \ + --translation_only \ # training on translation component of block stacking poses + --max_loss=2 \ + --use_root \ # based on HyperTree "root" in hypertree code + --one_hot_encoding \ # action will be one hot encoded + "$@" diff --git a/scripts/costar_block_stacking_translation_search.sh b/scripts/costar_block_stacking_translation_search.sh new file mode 100755 index 0000000..f3668b2 --- /dev/null +++ b/scripts/costar_block_stacking_translation_search.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="stacking_outputs_translation_5_cells_with_root" --batch_size=16 --num_epochs=640 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=10 --child_out_filters=20 --child_filter_size=5 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=5 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=1.0 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.0001 --controller_train_every=1 --controller_sync_replicas --controller_num_aggregate=10 --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --dataset="stacking" --height_img 64 --width_img 64 --translation_only --max_loss=2 --use_root --one_hot_encoding \ No newline at end of file diff --git a/scripts/costar_block_stacking_translation_search_no_root.sh b/scripts/costar_block_stacking_translation_search_no_root.sh new file mode 100755 index 0000000..22b6765 --- /dev/null +++ b/scripts/costar_block_stacking_translation_search_no_root.sh @@ -0,0 +1 @@ +python enas/cifar10/main.py --data_format="NHWC" --search_for="micro" --reset_output_dir --output_dir="stacking_outputs_translation_no_root" --batch_size=16 --num_epochs=640 --log_every=50 --eval_every_epochs=1 --child_use_aux_heads --child_num_layers=8 --child_out_filters=20 --child_filter_size=5 --child_l2_reg=1e-4 --child_num_branches=5 --child_num_cells=3 --child_keep_prob=0.90 --child_drop_path_keep_prob=0.60 --child_optimizer="sgd" --child_lr_cosine --child_lr_max=1.0 --child_lr_min=0.0005 --child_lr_T_0=10 --child_lr_T_mul=2 --controller_training --controller_search_whole_channels --controller_entropy_weight=0.0001 --controller_train_every=1 --controller_sync_replicas --controller_train_steps=30 --controller_lr=0.0035 --controller_tanh_constant=1.10 --controller_op_tanh_reduce=2.5 --dataset="stacking" --height_img 64 --width_img 64 --translation_only --max_loss=2 --one_hot_encoding \ No newline at end of file