testing

Naresh1318 · Dec 3, 2017 · 4e268c9 · 4e268c9
1 parent 14b807f
commit 4e268c9
Show file tree

Hide file tree

Showing 6 changed files with 168 additions and 127 deletions.
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/mission_control_v2.py → mission_control_breakout.py b/mission_control_v2.py → mission_control_breakout.py
@@ -1,15 +1,18 @@
 ########################################################################################################################
 # Training
-learning_rate = 1e-6
+learning_rate = 0.00025
 batch_size = 32
-observation_time = 5e5
-rand_observation_time = 5e4
+observation_time = 1e6  # 1e3
+rand_observation_time = 5e4  # 5e2
+target_network_update = 1e4  # 1e3
 prob_random = 1
 gamma = 0.99
-n_plays = 10000
-fit_epochs = 5
-decay = 0.75
+n_episodes = 1e4  # 5
+fit_epochs = 1
 weight_init = 0.01
+momentum = 0.95
+epsilon = 0.01
+
 
 ########################################################################################################################
 # Agent Model
@@ -19,14 +22,14 @@
 stride_2 = [1, 2, 2, 1]
 conv_3 = [3, 3, 64, 64]
 stride_3 = [1, 1, 1, 1]
-dense_1 = 512  # Was 256 before
+dense_1 = 512
 dense_2 = 4
 
 
 ########################################################################################################################
 # Control
-train_model = False
-show_ui = True
+train_model = True
+show_ui = False
 show_action = False
 
 ########################################################################################################################

diff --git a/mission_control_lunar.py b/mission_control_lunar.py
@@ -1,25 +1,27 @@
 ########################################################################################################################
 # Training
-learning_rate = 0.0001
+learning_rate = 0.001
 batch_size = 32
-observation_time = 5e5
-rand_observation_time = 5e4
+observation_time = 5e4
+rand_observation_time = 5e3
 prob_random = 1
-gamma = 0.9
-fit_epochs = 10
-n_plays = 10000  # TODO: Change to 100
-n_actual_plays = 100
+gamma = 0.98
+fit_epochs = 1
+n_plays = 1000  # TODO: Change to 100
+n_actual_plays = 10
+
+# TODO: Have Done.   Changed the init to Glorot init instead of truncated norm
 
 ########################################################################################################################
 # Agent Model
-dense_1 = 512
-dense_2 = 256
-dense_3 = 64
+dense_1 = 40
+dense_2 = 20
+dense_3 = 10
 
 ########################################################################################################################
 # Control
 train_model = True
-show_ui = False
+show_ui = True
 show_action = False
 
 ########################################################################################################################

diff --git a/ops.py b/ops.py
@@ -1,7 +1,6 @@
-import tensorflow as tf
 import numpy as np
 import scipy.ndimage
-import mission_control_v3 as mc
+import tensorflow as tf
 
 
 def dense(x, n1, n2, name):
@@ -15,9 +14,9 @@ def dense(x, n1, n2, name):
     """
     with tf.variable_scope(name):
         weights = tf.get_variable('weights', shape=[n1, n2], dtype=tf.float32,
-                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
+                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
         bias = tf.get_variable('bias', shape=[n2], dtype=tf.float32,
-                               initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
+                               initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
         output = tf.add(tf.matmul(x, weights), bias, name='output')
         return output
 
@@ -33,21 +32,22 @@ def cnn_2d(x, weight_shape, strides, name):
     """
     with tf.variable_scope(name):
         weights = tf.get_variable('weights', shape=weight_shape, dtype=tf.float32,
-                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
-        bias = tf.get_variable('bias', shape=[weight_shape[-1]], initializer=tf.truncated_normal_initializer(mean=0, stddev=mc.weight_init))
+                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
+        bias = tf.get_variable('bias', shape=[weight_shape[-1]], dtype=tf.float32,
+                               initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))
         output = tf.nn.conv2d(x, filter=weights, strides=strides, padding="VALID", name="Output") + bias
         return output
 
 
 def convert_to_gray_n_resize(im):
     """
-    Converts the input image to gray scale and resizes it to 84 x 84
+    Converts the input image to gray scale and resize it to 84 x 84
     :param im: 3d image, image to convert
     :return: 2d image matProcessed image
     """
-    # r, g, b = im[:, :, 0], im[:, :, 1], im[:, :, 2]
-    # img_gray = 0.2990 * r + 0.5870 * g + 0.1140 * b
-    img_gray = np.mean(im, axis=2).astype(np.uint8)
+    r, g, b = im[:, :, 0], im[:, :, 1], im[:, :, 2]
+    img_gray = 0.2990 * r + 0.5870 * g + 0.1140 * b
+    img_gray = np.array(img_gray).astype(np.uint8)
     img = scipy.misc.imresize(img_gray, size=[84, 84], interp='bicubic')
     return np.array(img, dtype=np.uint8)
 
@@ -60,16 +60,3 @@ def convert_reward(reward):
     """
     return np.sign(reward)
 
-
-def anneal_epsilon(epi, step):
-    """
-    Anneal epsilon linearly for the first 1e6 steps, and fix it to a constant value thereafter
-    :param epi: float, value indicating the probability of random action
-    :param step: int or float, steps taken during training
-    :return: float, annealed epsilon
-    """
-    if step < 5e5:
-        epi = epi * (.1**.2e-5)
-    else:
-        epi = 0.1
-    return epi