Skip to content

Commit

Permalink
Pong environment config
Browse files Browse the repository at this point in the history
Add configs and deps for Atari Pong environment.
  • Loading branch information
jupyter31 committed Jul 7, 2024
1 parent 99aea4f commit e6df518
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
write something.
3 changes: 3 additions & 0 deletions environment_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ channels:
- conda-forge
- pytorch
dependencies:
- gym
- gym-atari
- gym[accept-rom-license]
- ffmpeg=4.2.2
- moviepy=1.0.3
- notebook=6.5.4
Expand Down
46 changes: 46 additions & 0 deletions q_learning/config/dqn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
model: "dqn"

env:
env_name: "ALE/Pong-v5"
overwrite_render: True
record: False # use `record_episode.py` instead
high: 255.
render_mode: "rgb_array"

output:
output_path: &output_path "results/q7_dqn/"
model_output: "submission/model.weights" # output weights to submission folder
log_path: !join [*output_path, "log.txt"]
plot_output: !join [*output_path, "scores.png"]
record_path: !join [*output_path, "monitor/"]

model_training:
load_path: "pretrained_weights/model.weights"
num_episodes_test: 50
grad_clip: True
clip_val: 10
saving_freq: 250000
log_freq: 50
eval_freq: 250000
record_freq: 250000
soft_epsilon: 0.05
device: "gpu" # cpu/gpu
compile: False
compile_mode: "default"

hyper_params:
nsteps_train: 4000000
batch_size: 32
buffer_size: 1000000
target_update_freq: 10000
gamma: 0.99
learning_freq: 4
state_history: 4
skip_frame: 4
lr_begin: 0.00008
lr_end: 0.00005
lr_nsteps: 500000
eps_begin: 0.5
eps_end: 0.1
eps_nsteps: 1000000
learning_start: 50000
45 changes: 45 additions & 0 deletions q_learning/config/linear.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
model: "test_linear"

env:
env_name: "ALE/Pong-v5"
overwrite_render: True
record: False
high: 255.
render_mode: "rgb_array"

output:
output_path: &output_path "results/test_linear/"
model_output: !join [*output_path, "model.weights"]
log_path: !join [*output_path, "log.txt"]
plot_output: !join [*output_path, "scores.png"]
record_path: !join [*output_path, "monitor/"]

model_training:
num_episodes_test: 50
grad_clip: True
clip_val: 10
saving_freq: 250000
log_freq: 50
eval_freq: 250000
record_freq: 250000
soft_epsilon: 0.05
device: "gpu" # cpu/gpu
compile: False
compile_mode: "default"

hyper_params:
nsteps_train: 500000
batch_size: 32
buffer_size: 1000000
target_update_freq: 10000
gamma: 0.99
learning_freq: 4
state_history: 4
skip_frame: 4
lr_begin: 0.00008
lr_end: 0.00005
lr_nsteps: 500000
eps_begin: 0.5
eps_end: 0.1
eps_nsteps: 1000000
learning_start: 50000
2 changes: 1 addition & 1 deletion q_learning/config/test_dqn.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ hyper_params:
learning_start: 200

output:
output_path: &output_path "results/dqn_deepmind/"
output_path: &output_path "results/test_dqn/"
model_output: !join [*output_path, "model.weights.pt"]
log_path: !join [*output_path, "log.txt"]
plot_output: !join [*output_path, "scores.png"]
4 changes: 2 additions & 2 deletions q_learning/config/test_linear.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
model: "linear"
model: "test_linear"

env:
env_name: "test_environment"
Expand Down Expand Up @@ -35,7 +35,7 @@ hyper_params:
learning_start: 200

output:
output_path: &output_path "results/linear/"
output_path: &output_path "results/test_linear/"
model_output: !join [*output_path, "model.weights.pt"]
log_path: !join [*output_path, "log.txt"]
plot_output: !join [*output_path, "scores.png"]
6 changes: 3 additions & 3 deletions q_learning/network/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class Linear(AbstractDQN):
"""
We represent Q function as linear approximation Q_\theta(s,a) = \thetaT*\delta(s,a)
We represent Q function as test_linear approximation Q_\theta(s,a) = \thetaT*\delta(s,a)
where [\delta(s,a)]_{s‘,a‘} = 1 iff s‘ = s, a‘ = a.
Implementation of a single fully connected layer with Pytorch to be utilized
in the DQN algorithm.
Expand All @@ -21,7 +21,7 @@ def initialize_models(self):
state_shape = list(self.env.observation_space.shape)
img_height, img_width, n_channels = state_shape
num_actions = self.env.action_space.n
# linear layer with num_actions as the output size
# test_linear layer with num_actions as the output size
self.q_network = nn.Linear(
img_height * img_width * n_channels * self.config["hyper_params"]["state_history"],
num_actions)
Expand Down Expand Up @@ -114,6 +114,6 @@ def calc_loss(

def add_optimizer(self):
"""
This function sets the optimizer for our linear network (optimize only q_network).
This function sets the optimizer for our test_linear network (optimize only q_network).
"""
self.optimizer = torch.optim.Adam(self.q_network.parameters())
4 changes: 2 additions & 2 deletions q_learning/network/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class LinearSchedule(object):
"""
Sets linear schedule for exploration parameter epsilon.
Sets test_linear schedule for exploration parameter epsilon.
Args:
eps_begin (float): initial exploration
Expand Down Expand Up @@ -46,7 +46,7 @@ def update(self, t):

class LinearExploration(LinearSchedule):
"""
Implements e-greedy exploration with linear decay.
Implements e-greedy exploration with test_linear decay.
Args:
env (object): gym environment
Expand Down

0 comments on commit e6df518

Please sign in to comment.