Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Naresh1318 committed Dec 10, 2017
1 parent 1236d06 commit 77653e1
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

152 changes: 152 additions & 0 deletions generate_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import numpy as np
import tensorflow as tf
import gym
import os
import mission_control_breakout as mc
import ops
import matplotlib.pyplot as plt
from gym.wrappers import Monitor

# Setup the environment
env = gym.make('BreakoutDeterministic-v4')
# env = Monitor(env=env, directory="./Results/Videos/Breakout", resume=True)

# Placeholders
X_input = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='Observations')
Y_target = tf.placeholder(dtype=tf.float32, shape=[None, 4], name='Target_Q_values')


def get_agent(x, reuse=False):
"""
Generate the CNN agent
:param x: tensor, Input frames concatenated along axis 3
:param reuse: bool, True -> Reuse weight variables
False -> Create new ones
:return: Tensor, logits for each valid action
"""
if reuse:
tf.get_variable_scope().reuse_variables()

x = tf.divide(x, 255.0, name='Normalize')
conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=mc.conv_1, strides=mc.stride_1, name='conv_1'))
conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=mc.conv_2, strides=mc.stride_2, name='conv_2'))
conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=mc.conv_3, strides=mc.stride_3, name='conv_3'))
conv_3_r = tf.reshape(conv_3, [-1, 7 * 7 * 64], name='reshape')
dense_1 = tf.nn.relu(ops.dense(conv_3_r, 7 * 7 * 64, mc.dense_1, name='dense_1'))
output = ops.dense(dense_1, mc.dense_1, mc.dense_2, name='dense_2')
return output


def make_directories():
"""
Create directories to store tenorboard files, saved models and log files during each unique run.
:param main_dir: String, points to a results file
:return: list of strings, required directories paths
"""
main_dir = "./Dataset/Breakout"
train_dir = main_dir + "/train"
test_dir = main_dir + "/test"
os.mkdir(main_dir)
os.mkdir(train_dir)
os.mkdir(test_dir)
return main_dir, train_dir, test_dir


def play_n_collect(sess, agent, no_plays, log_dir=None, show_ui=False, show_action=False):
"""
Use a trained agent to play a required number of games
:param sess: op, session instance from tensorflow
:param agent: tensor, trained agent structure/graph
:param no_plays: int, you get it
:param log_dir: string, place to store the log files during gameplay
:param show_ui: bool, True -> Show game screen
False -> Should I explain this?
:param show_action: bool, True -> Show the actions taken by the trained agent
False -> Hmm, what can this be?
:return: just prints the results with nothing being returned
"""
rewards = []
main_dir, train_dir, test_dir = make_directories()
step = 0
for p in range(no_plays):
frame = 0
observation = env.reset()
if p < 1000:
# Save the first image
episode_path = train_dir + "/{:05d}".format(p)
else:
episode_path = test_dir + "/{:05d}".format(p % 1000)
os.mkdir(episode_path)
plt.imsave(arr=observation, fname=episode_path + "/{:06d}.png".format(frame))

observation = ops.convert_to_gray_n_resize(observation)
observation = np.expand_dims(observation, axis=2)
state = np.repeat(observation, 4, axis=2)
state = np.expand_dims(state, axis=0)
done = False
reward = 0
while not done:
if show_ui:
env.render()
if np.random.rand() < 0.07:
action = env.action_space.sample()
else:
action = np.argmax(sess.run(agent, feed_dict={X_input: state}))

# Save the action taken
with open(episode_path + "/action.txt", "a") as log:
log.write("{}\n".format(action))

if show_action:
print(action)
frame += 1
step += 1
new_state, r, done, _ = env.step(action)
plt.imsave(arr=new_state, fname=episode_path + "/{:06d}.png".format(frame))
next_state = ops.convert_to_gray_n_resize(new_state)
next_state = np.expand_dims(next_state, axis=2)
next_state = np.expand_dims(next_state, axis=0)
state = np.append(next_state, state[:, :, :, :3], axis=3)
reward += r
rewards.append(reward)
print("Step: {}/500e3".format(step))
print("Game: {}/{}".format(p + 1, no_plays))
print("Reward: {}\n".format(reward))
if not log_dir is None:
with open(log_dir + "/log.txt", "a") as log_file:
log_file.write("Game: {}/{}\n".format(p + 1, no_plays))
log_file.write("Reward: {}\n".format(reward))
print("------------------------------------------------------------------------------------------------------")
print("Best reward: {}".format(np.amax(rewards)))
print("Average reward: {}".format(np.mean(rewards)))
if not log_dir is None:
with open(log_dir + "/log.txt", "a") as log_file:
log_file.write("Best reward: {}\n".format(np.amax(rewards)))
log_file.write("Average reward: {}\n".format(np.mean(rewards)))


def train():
"""
Trains the agent with hyperparameters and other info loaded from mission_control_<game>.py file
:param train_model: bool, True -> Trains the agent
False -> Loads the LATEST trained agent and plays
:return: absolutely nothing
"""
with tf.variable_scope("Action_agent"):
agent = get_agent(X_input)

saver = tf.train.Saver()

with tf.Session() as sess:
# Get the latest trained model
saved_models = os.listdir(mc.logdir)
latest_saved_model = sorted(saved_models)[-1]
saver.restore(sess, tf.train.latest_checkpoint(mc.logdir + latest_saved_model + "/saved_models/"))
print("Getting model from: {}".format(mc.logdir + latest_saved_model + "/saved_models/"))
print("------------------------Playing----------------------------")
play_n_collect(sess=sess, agent=agent, no_plays=1200, log_dir=None,
show_ui=False, show_action=mc.show_action)


if __name__ == '__main__':
train()
70 changes: 70 additions & 0 deletions generate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import collections
import ops


class predict_frame:
def __int__(self):
self.input_frames = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='input_frames')
self.target_frame = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 1], name='target_frame')
self.action_performed = tf.placeholder(dtype=tf.int32, shape=[None, 4], name='action_performed')
self.learning_rate = 1e-4
self.batch_size = 32
self.momentum = 0.9

def model(self, x, action, reuse=False):
if reuse:
tf.get_variable_scope().reuse_variables()

# Encoder
conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=[6, 6, 4, 64], strides=[1, 2, 2, 1], name='conv_1'))
conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1],
name='conv_2', padding="SAME"))
conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1],
name='conv_3', padding="SAME"))
conv_3_flatten = tf.reshape(conv_3, shape=[-1, 6400], name='reshape_1')
dense_1 = ops.dense(conv_3_flatten, 6400, 1024, name='dense_1')
dense_2 = ops.dense(dense_1, 1024, 2048, name='dense_2')
action_dense_1 = ops.dense(action, 4, 2048, name='action_dense_1')
dense_2_action = tf.multiply(dense_2, action_dense_1, name='dense_2_action')

# Decoder
dense_3 = ops.dense(dense_2_action, 2048, 1024, name='dense_3')
dense_4 = tf.nn.relu(ops.dense(dense_3, 1024, 6400, name='dense_4'))
dense_4_reshaped = tf.reshape(dense_4, shape=[-1, 10, 10, 64], name='dense_4_reshaped')
conv_t_1 = tf.nn.relu(ops.cnn_2d_trans(dense_4_reshaped, weight_shape=[6, 6, 64, 64],
strides=[1, 2, 2, 1], output_shape=[-1, 20, 20, 64], name='conv_t_1'))
conv_t_2 = tf.nn.relu(ops.cnn_2d_trans(conv_t_1, weight_shape=[6, 6, 64, 64],
strides=[1, 2, 2, 1], output_shape=[-1, 40, 40, 64], name='conv_t_2'))
output = ops.cnn_2d_trans(conv_t_2, weight_shape=[6, 6, 3, 64],
strides=[1, 2, 2, 1], output_shape=[-1, 84, 84, 3], name='output_image')
return output

def train(self):

with tf.variable_scope("prediction_model"):
generated_image = self.model(self.input_frames, self.action_performed)

generated_image_clipped = tf.clip_by_value(generated_image, 0, 1)

clipping_loss = tf.reduce_mean(tf.square(generated_image_clipped - generated_image))

eps = 1e-5
l1_loss = tf.reduce_mean(tf.abs(generated_image - self.target_frame + eps))

loss = 0.9 * l1_loss + 0.1 * clipping_loss

optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate, momentum=self.momentum).minimize(loss)

tf.summary.scalar(name='l1_loss', tensor=l1_loss)
tf.summary.scalar(name='clipping_loss', tensor=clipping_loss)
tf.summary.image(name='Generated_image', tensor=generated_image_clipped)

# TODO: Currently only shows latest input frame
tf.summary.image(name='Input_images', tensor=self.input_frames[:, :, :, 0])




0 comments on commit 77653e1

Please sign in to comment.