-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
43 lines (32 loc) · 1.44 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from Agent import Agent
from RLModel import RLModel
from ActorCritic import ActorCriticModel
import time
# PLACEHOLDER FOR TRAINING
model = ActorCriticModel(state_size=10, action_size=18) # 10 for 9 values and the image, 18 for 18 possible actions
agent = Agent(model, start_session=True)
num_episodes = 500
max_steps = 100
target_update_freq = 10
for episode in range(num_episodes):
agent.reset()
pos_state, image, json_state = agent.prepare_state()
total_reward = 0
for step in range(max_steps):
# Select action
print("Making decision...")
discrete_probs, continuous_probs, output_vector = agent.decision() # this calls the agent to get the environment
action_probs = [discrete_probs, continuous_probs]
print("Decision Made")
next_pos_state, next_image, next_json_state = agent.prepare_state()
reward = model.reward(next_json_state)
done = False
model.store_experience(pos_state, image, action_probs, reward, next_pos_state, next_image, done)
model.train() # Does not work yet, have to think about the action state more
pos_state = next_pos_state
image = next_image
total_reward += reward
if episode % target_update_freq == 0:
model.update_target_network()
print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}, Epsilon: {model.epsilon}")
time.sleep(100000)