initial commit

Naresh1318 · Dec 10, 2017 · 77653e1 · 77653e1
1 parent 1236d06
commit 77653e1
Show file tree

Hide file tree

Showing 3 changed files with 229 additions and 0 deletions.
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/generate_dataset.py b/generate_dataset.py
@@ -0,0 +1,152 @@
+import numpy as np
+import tensorflow as tf
+import gym
+import os
+import mission_control_breakout as mc
+import ops
+import matplotlib.pyplot as plt
+from gym.wrappers import Monitor
+
+# Setup the environment
+env = gym.make('BreakoutDeterministic-v4')
+# env = Monitor(env=env, directory="./Results/Videos/Breakout", resume=True)
+
+# Placeholders
+X_input = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='Observations')
+Y_target = tf.placeholder(dtype=tf.float32, shape=[None, 4], name='Target_Q_values')
+
+
+def get_agent(x, reuse=False):
+    """
+    Generate the CNN agent
+    :param x: tensor, Input frames concatenated along axis 3
+    :param reuse: bool, True -> Reuse weight variables
+                        False -> Create new ones
+    :return: Tensor, logits for each valid action
+    """
+    if reuse:
+        tf.get_variable_scope().reuse_variables()
+
+    x = tf.divide(x, 255.0, name='Normalize')
+    conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=mc.conv_1, strides=mc.stride_1, name='conv_1'))
+    conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=mc.conv_2, strides=mc.stride_2, name='conv_2'))
+    conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=mc.conv_3, strides=mc.stride_3, name='conv_3'))
+    conv_3_r = tf.reshape(conv_3, [-1, 7 * 7 * 64], name='reshape')
+    dense_1 = tf.nn.relu(ops.dense(conv_3_r, 7 * 7 * 64, mc.dense_1, name='dense_1'))
+    output = ops.dense(dense_1, mc.dense_1, mc.dense_2, name='dense_2')
+    return output
+
+
+def make_directories():
+    """
+    Create directories to store tenorboard files, saved models and log files during each unique run.
+    :param main_dir: String, points to a results file
+    :return: list of strings, required directories paths
+    """
+    main_dir = "./Dataset/Breakout"
+    train_dir = main_dir + "/train"
+    test_dir = main_dir + "/test"
+    os.mkdir(main_dir)
+    os.mkdir(train_dir)
+    os.mkdir(test_dir)
+    return main_dir, train_dir, test_dir
+
+
+def play_n_collect(sess, agent, no_plays, log_dir=None, show_ui=False, show_action=False):
+    """
+    Use a trained agent to play a required number of games
+    :param sess: op, session instance from tensorflow
+    :param agent: tensor, trained agent structure/graph
+    :param no_plays: int, you get it
+    :param log_dir: string, place to store the log files during gameplay
+    :param show_ui: bool, True  -> Show game screen
+                          False -> Should I explain this?
+    :param show_action: bool, True  -> Show the actions taken by the trained agent
+                              False -> Hmm, what can this be?
+    :return: just prints the results with nothing being returned
+    """
+    rewards = []
+    main_dir, train_dir, test_dir = make_directories()
+    step = 0
+    for p in range(no_plays):
+        frame = 0
+        observation = env.reset()
+        if p < 1000:
+            # Save the first image
+            episode_path = train_dir + "/{:05d}".format(p)
+        else:
+            episode_path = test_dir + "/{:05d}".format(p % 1000)
+        os.mkdir(episode_path)
+        plt.imsave(arr=observation, fname=episode_path + "/{:06d}.png".format(frame))
+
+        observation = ops.convert_to_gray_n_resize(observation)
+        observation = np.expand_dims(observation, axis=2)
+        state = np.repeat(observation, 4, axis=2)
+        state = np.expand_dims(state, axis=0)
+        done = False
+        reward = 0
+        while not done:
+            if show_ui:
+                env.render()
+            if np.random.rand() < 0.07:
+                action = env.action_space.sample()
+            else:
+                action = np.argmax(sess.run(agent, feed_dict={X_input: state}))
+
+            # Save the action taken
+            with open(episode_path + "/action.txt", "a") as log:
+                log.write("{}\n".format(action))
+
+            if show_action:
+                print(action)
+            frame += 1
+            step += 1
+            new_state, r, done, _ = env.step(action)
+            plt.imsave(arr=new_state, fname=episode_path + "/{:06d}.png".format(frame))
+            next_state = ops.convert_to_gray_n_resize(new_state)
+            next_state = np.expand_dims(next_state, axis=2)
+            next_state = np.expand_dims(next_state, axis=0)
+            state = np.append(next_state, state[:, :, :, :3], axis=3)
+            reward += r
+        rewards.append(reward)
+        print("Step: {}/500e3".format(step))
+        print("Game: {}/{}".format(p + 1, no_plays))
+        print("Reward: {}\n".format(reward))
+        if not log_dir is None:
+            with open(log_dir + "/log.txt", "a") as log_file:
+                log_file.write("Game: {}/{}\n".format(p + 1, no_plays))
+                log_file.write("Reward: {}\n".format(reward))
+    print("------------------------------------------------------------------------------------------------------")
+    print("Best reward: {}".format(np.amax(rewards)))
+    print("Average reward: {}".format(np.mean(rewards)))
+    if not log_dir is None:
+        with open(log_dir + "/log.txt", "a") as log_file:
+            log_file.write("Best reward: {}\n".format(np.amax(rewards)))
+            log_file.write("Average reward: {}\n".format(np.mean(rewards)))
+
+
+def train():
+    """
+    Trains the agent with hyperparameters and other info loaded from mission_control_<game>.py file
+    :param train_model: bool, True  -> Trains the agent
+                              False -> Loads the LATEST trained agent and plays
+    :return: absolutely nothing
+    """
+    with tf.variable_scope("Action_agent"):
+        agent = get_agent(X_input)
+
+    saver = tf.train.Saver()
+
+    with tf.Session() as sess:
+        # Get the latest trained model
+        saved_models = os.listdir(mc.logdir)
+        latest_saved_model = sorted(saved_models)[-1]
+        saver.restore(sess, tf.train.latest_checkpoint(mc.logdir + latest_saved_model + "/saved_models/"))
+        print("Getting model from: {}".format(mc.logdir + latest_saved_model + "/saved_models/"))
+        print("------------------------Playing----------------------------")
+        play_n_collect(sess=sess, agent=agent, no_plays=1200, log_dir=None,
+                       show_ui=False, show_action=mc.show_action)
+
+
+if __name__ == '__main__':
+    train()
diff --git a/generate_model.py b/generate_model.py
@@ -0,0 +1,70 @@
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+import collections
+import ops
+
+
+class predict_frame:
+    def __int__(self):
+        self.input_frames = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='input_frames')
+        self.target_frame = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 1], name='target_frame')
+        self.action_performed = tf.placeholder(dtype=tf.int32, shape=[None, 4], name='action_performed')
+        self.learning_rate = 1e-4
+        self.batch_size = 32
+        self.momentum = 0.9
+
+    def model(self, x, action, reuse=False):
+        if reuse:
+            tf.get_variable_scope().reuse_variables()
+
+        # Encoder
+        conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=[6, 6, 4, 64], strides=[1, 2, 2, 1], name='conv_1'))
+        conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1],
+                                       name='conv_2', padding="SAME"))
+        conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1],
+                                       name='conv_3', padding="SAME"))
+        conv_3_flatten = tf.reshape(conv_3, shape=[-1, 6400], name='reshape_1')
+        dense_1 = ops.dense(conv_3_flatten, 6400, 1024, name='dense_1')
+        dense_2 = ops.dense(dense_1, 1024, 2048, name='dense_2')
+        action_dense_1 = ops.dense(action, 4, 2048, name='action_dense_1')
+        dense_2_action = tf.multiply(dense_2, action_dense_1, name='dense_2_action')
+
+        # Decoder
+        dense_3 = ops.dense(dense_2_action, 2048, 1024, name='dense_3')
+        dense_4 = tf.nn.relu(ops.dense(dense_3, 1024, 6400, name='dense_4'))
+        dense_4_reshaped = tf.reshape(dense_4, shape=[-1, 10, 10, 64], name='dense_4_reshaped')
+        conv_t_1 = tf.nn.relu(ops.cnn_2d_trans(dense_4_reshaped, weight_shape=[6, 6, 64, 64],
+                                               strides=[1, 2, 2, 1], output_shape=[-1, 20, 20, 64], name='conv_t_1'))
+        conv_t_2 = tf.nn.relu(ops.cnn_2d_trans(conv_t_1, weight_shape=[6, 6, 64, 64],
+                                               strides=[1, 2, 2, 1], output_shape=[-1, 40, 40, 64], name='conv_t_2'))
+        output = ops.cnn_2d_trans(conv_t_2, weight_shape=[6, 6, 3, 64],
+                                  strides=[1, 2, 2, 1], output_shape=[-1, 84, 84, 3], name='output_image')
+        return output
+
+    def train(self):
+
+        with tf.variable_scope("prediction_model"):
+            generated_image = self.model(self.input_frames, self.action_performed)
+
+        generated_image_clipped = tf.clip_by_value(generated_image, 0, 1)
+
+        clipping_loss = tf.reduce_mean(tf.square(generated_image_clipped - generated_image))
+
+        eps = 1e-5
+        l1_loss = tf.reduce_mean(tf.abs(generated_image - self.target_frame + eps))
+
+        loss = 0.9 * l1_loss + 0.1 * clipping_loss
+
+        optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate, momentum=self.momentum).minimize(loss)
+
+        tf.summary.scalar(name='l1_loss', tensor=l1_loss)
+        tf.summary.scalar(name='clipping_loss', tensor=clipping_loss)
+        tf.summary.image(name='Generated_image', tensor=generated_image_clipped)
+
+        # TODO: Currently only shows latest input frame
+        tf.summary.image(name='Input_images', tensor=self.input_frames[:, :, :, 0])
+
+
+
+