Pong environment config

Add configs and deps for Atari Pong environment.
jupyter31 · Jul 7, 2024 · e6df518 · e6df518
1 parent 99aea4f
commit e6df518
Show file tree

Hide file tree

Showing 8 changed files with 103 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+write something.
diff --git a/environment_cuda.yml b/environment_cuda.yml
@@ -4,6 +4,9 @@ channels:
   - conda-forge
   - pytorch
 dependencies:
+  - gym
+  - gym-atari
+  - gym[accept-rom-license]
   - ffmpeg=4.2.2
   - moviepy=1.0.3
   - notebook=6.5.4

diff --git a/q_learning/config/dqn.yml b/q_learning/config/dqn.yml
@@ -0,0 +1,46 @@
+model: "dqn"
+
+env:
+    env_name: "ALE/Pong-v5"
+    overwrite_render: True
+    record: False # use `record_episode.py` instead
+    high: 255.
+    render_mode: "rgb_array"
+
+output:
+    output_path: &output_path "results/q7_dqn/"
+    model_output: "submission/model.weights"  # output weights to submission folder
+    log_path: !join [*output_path, "log.txt"]
+    plot_output: !join [*output_path, "scores.png"]
+    record_path: !join [*output_path, "monitor/"]
+
+model_training:
+    load_path: "pretrained_weights/model.weights"
+    num_episodes_test: 50
+    grad_clip: True
+    clip_val: 10
+    saving_freq: 250000
+    log_freq: 50
+    eval_freq: 250000
+    record_freq: 250000
+    soft_epsilon: 0.05
+    device: "gpu" # cpu/gpu
+    compile: False
+    compile_mode: "default"
+
+hyper_params:
+    nsteps_train: 4000000
+    batch_size: 32
+    buffer_size: 1000000
+    target_update_freq: 10000
+    gamma: 0.99
+    learning_freq: 4
+    state_history: 4
+    skip_frame: 4
+    lr_begin: 0.00008
+    lr_end: 0.00005
+    lr_nsteps: 500000
+    eps_begin: 0.5
+    eps_end: 0.1
+    eps_nsteps: 1000000
+    learning_start: 50000
diff --git a/q_learning/config/linear.yml b/q_learning/config/linear.yml
@@ -0,0 +1,45 @@
+model: "test_linear"
+
+env:
+    env_name: "ALE/Pong-v5"
+    overwrite_render: True
+    record: False
+    high: 255.
+    render_mode: "rgb_array"
+
+output:
+    output_path: &output_path "results/test_linear/"
+    model_output: !join [*output_path, "model.weights"]
+    log_path: !join [*output_path, "log.txt"]
+    plot_output: !join [*output_path, "scores.png"]
+    record_path: !join [*output_path, "monitor/"]
+
+model_training:
+    num_episodes_test: 50
+    grad_clip: True
+    clip_val: 10
+    saving_freq: 250000
+    log_freq: 50
+    eval_freq: 250000
+    record_freq: 250000
+    soft_epsilon: 0.05
+    device: "gpu" # cpu/gpu
+    compile: False
+    compile_mode: "default"
+
+hyper_params:
+    nsteps_train: 500000
+    batch_size: 32
+    buffer_size: 1000000
+    target_update_freq: 10000
+    gamma: 0.99
+    learning_freq: 4
+    state_history: 4
+    skip_frame: 4
+    lr_begin: 0.00008
+    lr_end: 0.00005
+    lr_nsteps: 500000
+    eps_begin: 0.5
+    eps_end: 0.1
+    eps_nsteps: 1000000
+    learning_start: 50000
diff --git a/q_learning/config/test_dqn.yml b/q_learning/config/test_dqn.yml
@@ -35,7 +35,7 @@ hyper_params:
     learning_start: 200
 
 output:
-    output_path: &output_path "results/dqn_deepmind/"
+    output_path: &output_path "results/test_dqn/"
     model_output: !join [*output_path, "model.weights.pt"]
     log_path: !join [*output_path, "log.txt"]
     plot_output: !join [*output_path, "scores.png"]
diff --git a/q_learning/config/test_linear.yml b/q_learning/config/test_linear.yml
@@ -1,4 +1,4 @@
-model: "linear"
+model: "test_linear"
 
 env:
     env_name: "test_environment"
@@ -35,7 +35,7 @@ hyper_params:
     learning_start: 200
 
 output:
-    output_path: &output_path "results/linear/"
+    output_path: &output_path "results/test_linear/"
     model_output: !join [*output_path, "model.weights.pt"]
     log_path: !join [*output_path, "log.txt"]
     plot_output: !join [*output_path, "scores.png"]
diff --git a/q_learning/network/linear.py b/q_learning/network/linear.py
@@ -6,7 +6,7 @@
 
 class Linear(AbstractDQN):
     """
-    We represent Q function as linear approximation Q_\theta(s,a) = \thetaT*\delta(s,a)
+    We represent Q function as test_linear approximation Q_\theta(s,a) = \thetaT*\delta(s,a)
        where [\delta(s,a)]_{s‘,a‘} = 1 iff s‘ = s, a‘ = a.
     Implementation of a single fully connected layer with Pytorch to be utilized
     in the DQN algorithm.
@@ -21,7 +21,7 @@ def initialize_models(self):
         state_shape = list(self.env.observation_space.shape)
         img_height, img_width, n_channels = state_shape
         num_actions = self.env.action_space.n
-        # linear layer with num_actions as the output size
+        # test_linear layer with num_actions as the output size
         self.q_network = nn.Linear(
             img_height * img_width * n_channels * self.config["hyper_params"]["state_history"],
             num_actions)
@@ -114,6 +114,6 @@ def calc_loss(
 
     def add_optimizer(self):
         """
-        This function sets the optimizer for our linear network (optimize only q_network).
+        This function sets the optimizer for our test_linear network (optimize only q_network).
         """
         self.optimizer = torch.optim.Adam(self.q_network.parameters())
diff --git a/q_learning/network/schedule.py b/q_learning/network/schedule.py
@@ -9,7 +9,7 @@
 
 class LinearSchedule(object):
     """
-    Sets linear schedule for exploration parameter epsilon.
+    Sets test_linear schedule for exploration parameter epsilon.
 
     Args:
         eps_begin (float): initial exploration
@@ -46,7 +46,7 @@ def update(self, t):
 
 class LinearExploration(LinearSchedule):
     """
-    Implements e-greedy exploration with linear decay.
+    Implements e-greedy exploration with test_linear decay.
 
     Args:
         env (object): gym environment