-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1236d06
commit 77653e1
Showing
3 changed files
with
229 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import numpy as np | ||
import tensorflow as tf | ||
import gym | ||
import os | ||
import mission_control_breakout as mc | ||
import ops | ||
import matplotlib.pyplot as plt | ||
from gym.wrappers import Monitor | ||
|
||
# Setup the environment | ||
env = gym.make('BreakoutDeterministic-v4') | ||
# env = Monitor(env=env, directory="./Results/Videos/Breakout", resume=True) | ||
|
||
# Placeholders | ||
X_input = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='Observations') | ||
Y_target = tf.placeholder(dtype=tf.float32, shape=[None, 4], name='Target_Q_values') | ||
|
||
|
||
def get_agent(x, reuse=False): | ||
""" | ||
Generate the CNN agent | ||
:param x: tensor, Input frames concatenated along axis 3 | ||
:param reuse: bool, True -> Reuse weight variables | ||
False -> Create new ones | ||
:return: Tensor, logits for each valid action | ||
""" | ||
if reuse: | ||
tf.get_variable_scope().reuse_variables() | ||
|
||
x = tf.divide(x, 255.0, name='Normalize') | ||
conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=mc.conv_1, strides=mc.stride_1, name='conv_1')) | ||
conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=mc.conv_2, strides=mc.stride_2, name='conv_2')) | ||
conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=mc.conv_3, strides=mc.stride_3, name='conv_3')) | ||
conv_3_r = tf.reshape(conv_3, [-1, 7 * 7 * 64], name='reshape') | ||
dense_1 = tf.nn.relu(ops.dense(conv_3_r, 7 * 7 * 64, mc.dense_1, name='dense_1')) | ||
output = ops.dense(dense_1, mc.dense_1, mc.dense_2, name='dense_2') | ||
return output | ||
|
||
|
||
def make_directories(): | ||
""" | ||
Create directories to store tenorboard files, saved models and log files during each unique run. | ||
:param main_dir: String, points to a results file | ||
:return: list of strings, required directories paths | ||
""" | ||
main_dir = "./Dataset/Breakout" | ||
train_dir = main_dir + "/train" | ||
test_dir = main_dir + "/test" | ||
os.mkdir(main_dir) | ||
os.mkdir(train_dir) | ||
os.mkdir(test_dir) | ||
return main_dir, train_dir, test_dir | ||
|
||
|
||
def play_n_collect(sess, agent, no_plays, log_dir=None, show_ui=False, show_action=False): | ||
""" | ||
Use a trained agent to play a required number of games | ||
:param sess: op, session instance from tensorflow | ||
:param agent: tensor, trained agent structure/graph | ||
:param no_plays: int, you get it | ||
:param log_dir: string, place to store the log files during gameplay | ||
:param show_ui: bool, True -> Show game screen | ||
False -> Should I explain this? | ||
:param show_action: bool, True -> Show the actions taken by the trained agent | ||
False -> Hmm, what can this be? | ||
:return: just prints the results with nothing being returned | ||
""" | ||
rewards = [] | ||
main_dir, train_dir, test_dir = make_directories() | ||
step = 0 | ||
for p in range(no_plays): | ||
frame = 0 | ||
observation = env.reset() | ||
if p < 1000: | ||
# Save the first image | ||
episode_path = train_dir + "/{:05d}".format(p) | ||
else: | ||
episode_path = test_dir + "/{:05d}".format(p % 1000) | ||
os.mkdir(episode_path) | ||
plt.imsave(arr=observation, fname=episode_path + "/{:06d}.png".format(frame)) | ||
|
||
observation = ops.convert_to_gray_n_resize(observation) | ||
observation = np.expand_dims(observation, axis=2) | ||
state = np.repeat(observation, 4, axis=2) | ||
state = np.expand_dims(state, axis=0) | ||
done = False | ||
reward = 0 | ||
while not done: | ||
if show_ui: | ||
env.render() | ||
if np.random.rand() < 0.07: | ||
action = env.action_space.sample() | ||
else: | ||
action = np.argmax(sess.run(agent, feed_dict={X_input: state})) | ||
|
||
# Save the action taken | ||
with open(episode_path + "/action.txt", "a") as log: | ||
log.write("{}\n".format(action)) | ||
|
||
if show_action: | ||
print(action) | ||
frame += 1 | ||
step += 1 | ||
new_state, r, done, _ = env.step(action) | ||
plt.imsave(arr=new_state, fname=episode_path + "/{:06d}.png".format(frame)) | ||
next_state = ops.convert_to_gray_n_resize(new_state) | ||
next_state = np.expand_dims(next_state, axis=2) | ||
next_state = np.expand_dims(next_state, axis=0) | ||
state = np.append(next_state, state[:, :, :, :3], axis=3) | ||
reward += r | ||
rewards.append(reward) | ||
print("Step: {}/500e3".format(step)) | ||
print("Game: {}/{}".format(p + 1, no_plays)) | ||
print("Reward: {}\n".format(reward)) | ||
if not log_dir is None: | ||
with open(log_dir + "/log.txt", "a") as log_file: | ||
log_file.write("Game: {}/{}\n".format(p + 1, no_plays)) | ||
log_file.write("Reward: {}\n".format(reward)) | ||
print("------------------------------------------------------------------------------------------------------") | ||
print("Best reward: {}".format(np.amax(rewards))) | ||
print("Average reward: {}".format(np.mean(rewards))) | ||
if not log_dir is None: | ||
with open(log_dir + "/log.txt", "a") as log_file: | ||
log_file.write("Best reward: {}\n".format(np.amax(rewards))) | ||
log_file.write("Average reward: {}\n".format(np.mean(rewards))) | ||
|
||
|
||
def train(): | ||
""" | ||
Trains the agent with hyperparameters and other info loaded from mission_control_<game>.py file | ||
:param train_model: bool, True -> Trains the agent | ||
False -> Loads the LATEST trained agent and plays | ||
:return: absolutely nothing | ||
""" | ||
with tf.variable_scope("Action_agent"): | ||
agent = get_agent(X_input) | ||
|
||
saver = tf.train.Saver() | ||
|
||
with tf.Session() as sess: | ||
# Get the latest trained model | ||
saved_models = os.listdir(mc.logdir) | ||
latest_saved_model = sorted(saved_models)[-1] | ||
saver.restore(sess, tf.train.latest_checkpoint(mc.logdir + latest_saved_model + "/saved_models/")) | ||
print("Getting model from: {}".format(mc.logdir + latest_saved_model + "/saved_models/")) | ||
print("------------------------Playing----------------------------") | ||
play_n_collect(sess=sess, agent=agent, no_plays=1200, log_dir=None, | ||
show_ui=False, show_action=mc.show_action) | ||
|
||
|
||
if __name__ == '__main__': | ||
train() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import tensorflow as tf | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import collections | ||
import ops | ||
|
||
|
||
class predict_frame: | ||
def __int__(self): | ||
self.input_frames = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 4], name='input_frames') | ||
self.target_frame = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 1], name='target_frame') | ||
self.action_performed = tf.placeholder(dtype=tf.int32, shape=[None, 4], name='action_performed') | ||
self.learning_rate = 1e-4 | ||
self.batch_size = 32 | ||
self.momentum = 0.9 | ||
|
||
def model(self, x, action, reuse=False): | ||
if reuse: | ||
tf.get_variable_scope().reuse_variables() | ||
|
||
# Encoder | ||
conv_1 = tf.nn.relu(ops.cnn_2d(x, weight_shape=[6, 6, 4, 64], strides=[1, 2, 2, 1], name='conv_1')) | ||
conv_2 = tf.nn.relu(ops.cnn_2d(conv_1, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1], | ||
name='conv_2', padding="SAME")) | ||
conv_3 = tf.nn.relu(ops.cnn_2d(conv_2, weight_shape=[6, 6, 64, 64], strides=[1, 2, 2, 1], | ||
name='conv_3', padding="SAME")) | ||
conv_3_flatten = tf.reshape(conv_3, shape=[-1, 6400], name='reshape_1') | ||
dense_1 = ops.dense(conv_3_flatten, 6400, 1024, name='dense_1') | ||
dense_2 = ops.dense(dense_1, 1024, 2048, name='dense_2') | ||
action_dense_1 = ops.dense(action, 4, 2048, name='action_dense_1') | ||
dense_2_action = tf.multiply(dense_2, action_dense_1, name='dense_2_action') | ||
|
||
# Decoder | ||
dense_3 = ops.dense(dense_2_action, 2048, 1024, name='dense_3') | ||
dense_4 = tf.nn.relu(ops.dense(dense_3, 1024, 6400, name='dense_4')) | ||
dense_4_reshaped = tf.reshape(dense_4, shape=[-1, 10, 10, 64], name='dense_4_reshaped') | ||
conv_t_1 = tf.nn.relu(ops.cnn_2d_trans(dense_4_reshaped, weight_shape=[6, 6, 64, 64], | ||
strides=[1, 2, 2, 1], output_shape=[-1, 20, 20, 64], name='conv_t_1')) | ||
conv_t_2 = tf.nn.relu(ops.cnn_2d_trans(conv_t_1, weight_shape=[6, 6, 64, 64], | ||
strides=[1, 2, 2, 1], output_shape=[-1, 40, 40, 64], name='conv_t_2')) | ||
output = ops.cnn_2d_trans(conv_t_2, weight_shape=[6, 6, 3, 64], | ||
strides=[1, 2, 2, 1], output_shape=[-1, 84, 84, 3], name='output_image') | ||
return output | ||
|
||
def train(self): | ||
|
||
with tf.variable_scope("prediction_model"): | ||
generated_image = self.model(self.input_frames, self.action_performed) | ||
|
||
generated_image_clipped = tf.clip_by_value(generated_image, 0, 1) | ||
|
||
clipping_loss = tf.reduce_mean(tf.square(generated_image_clipped - generated_image)) | ||
|
||
eps = 1e-5 | ||
l1_loss = tf.reduce_mean(tf.abs(generated_image - self.target_frame + eps)) | ||
|
||
loss = 0.9 * l1_loss + 0.1 * clipping_loss | ||
|
||
optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate, momentum=self.momentum).minimize(loss) | ||
|
||
tf.summary.scalar(name='l1_loss', tensor=l1_loss) | ||
tf.summary.scalar(name='clipping_loss', tensor=clipping_loss) | ||
tf.summary.image(name='Generated_image', tensor=generated_image_clipped) | ||
|
||
# TODO: Currently only shows latest input frame | ||
tf.summary.image(name='Input_images', tensor=self.input_frames[:, :, :, 0]) | ||
|
||
|
||
|
||
|