-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathconfig.py
37 lines (27 loc) · 792 Bytes
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from agent import TrpoAgent, PPOAgent
class config(object):
_vf_update_methods = ['LBFGS', 'GD']
policy_net_size = [64, 64]
baseline_net_size = [128, 64, 32]
init_log_var = -1 # e^-1
delta = 1e-2 # KL divergence between old and new policy (averaged over state-space)
cg_damping = 1e-1
env_name = 'Hopper-v1'
animate = False
timestep_limit = 10000
timesteps_per_batch = 10000
gamma = 0.99
lam = 0.97
vf_update_method = _vf_update_methods[1]
max_lbfgs_iter = 100
reg = 5e-3
mixfrac = 1.0 # 1.0 corresponds to overfitting
# PPO Updater Setup #
init_beta = 1.0
init_eta = 50
lr_multiplier = 1.0
kl_targ = 0.003
ppo_lr = None
update_epochs = 20
agent = PPOAgent
iterations = 1000