Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Naresh1318 committed Dec 3, 2017
1 parent 14b807f commit 4e268c9
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 127 deletions.
7 changes: 7 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 12 additions & 9 deletions mission_control_v2.py → mission_control_breakout.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
########################################################################################################################
# Training
learning_rate = 1e-6
learning_rate = 0.00025
batch_size = 32
observation_time = 5e5
rand_observation_time = 5e4
observation_time = 1e6 # 1e3
rand_observation_time = 5e4 # 5e2
target_network_update = 1e4 # 1e3
prob_random = 1
gamma = 0.99
n_plays = 10000
fit_epochs = 5
decay = 0.75
n_episodes = 1e4 # 5
fit_epochs = 1
weight_init = 0.01
momentum = 0.95
epsilon = 0.01


########################################################################################################################
# Agent Model
Expand All @@ -19,14 +22,14 @@
stride_2 = [1, 2, 2, 1]
conv_3 = [3, 3, 64, 64]
stride_3 = [1, 1, 1, 1]
dense_1 = 512 # Was 256 before
dense_1 = 512
dense_2 = 4


########################################################################################################################
# Control
train_model = False
show_ui = True
train_model = True
show_ui = False
show_action = False

########################################################################################################################
Expand Down
24 changes: 13 additions & 11 deletions mission_control_lunar.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
########################################################################################################################
# Training
learning_rate = 0.0001
learning_rate = 0.001
batch_size = 32
observation_time = 5e5
rand_observation_time = 5e4
observation_time = 5e4
rand_observation_time = 5e3
prob_random = 1
gamma = 0.9
fit_epochs = 10
n_plays = 10000 # TODO: Change to 100
n_actual_plays = 100
gamma = 0.98
fit_epochs = 1
n_plays = 1000 # TODO: Change to 100
n_actual_plays = 10

# TODO: Have Done. Changed the init to Glorot init instead of truncated norm

########################################################################################################################
# Agent Model
dense_1 = 512
dense_2 = 256
dense_3 = 64
dense_1 = 40
dense_2 = 20
dense_3 = 10

########################################################################################################################
# Control
train_model = True
show_ui = False
show_ui = True
show_action = False

########################################################################################################################
Expand Down
33 changes: 10 additions & 23 deletions ops.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import tensorflow as tf
import numpy as np
import scipy.ndimage
import mission_control_v3 as mc
import tensorflow as tf


def dense(x, n1, n2, name):
Expand All @@ -15,9 +14,9 @@ def dense(x, n1, n2, name):
"""
with tf.variable_scope(name):
weights = tf.get_variable('weights', shape=[n1, n2], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
bias = tf.get_variable('bias', shape=[n2], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
output = tf.add(tf.matmul(x, weights), bias, name='output')
return output

Expand All @@ -33,21 +32,22 @@ def cnn_2d(x, weight_shape, strides, name):
"""
with tf.variable_scope(name):
weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32,
initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
bias = tf.get_variable('bias', shape=[weight_shape[-1]], initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
bias = tf.get_variable('bias', shape=[weight_shape[-1]], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
output = tf.nn.conv2d(x, filter=weights, strides=strides, padding="VALID", name="Output") + bias
return output


def convert_to_gray_n_resize(im):
"""
Converts the input image to gray scale and resizes it to 84 x 84
Converts the input image to gray scale and resize it to 84 x 84
:param im: 3d image, image to convert
:return: 2d image matProcessed image
"""
# r, g, b = im[:, :, 0], im[:, :, 1], im[:, :, 2]
# img_gray = 0.2990 * r + 0.5870 * g + 0.1140 * b
img_gray = np.mean(im, axis=2).astype(np.uint8)
r, g, b = im[:, :, 0], im[:, :, 1], im[:, :, 2]
img_gray = 0.2990 * r + 0.5870 * g + 0.1140 * b
img_gray = np.array(img_gray).astype(np.uint8)
img = scipy.misc.imresize(img_gray, size=[84, 84], interp='bicubic')
return np.array(img, dtype=np.uint8)

Expand All @@ -60,16 +60,3 @@ def convert_reward(reward):
"""
return np.sign(reward)


def anneal_epsilon(epi, step):
"""
Anneal epsilon linearly for the first 1e6 steps, and fix it to a constant value thereafter
:param epi: float, value indicating the probability of random action
:param step: int or float, steps taken during training
:return: float, annealed epsilon
"""
if step < 5e5:
epi = epi * (.1**.2e-5)
else:
epi = 0.1
return epi
Loading

0 comments on commit 4e268c9

Please sign in to comment.