-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrunner.py
106 lines (100 loc) · 4.32 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import numpy as np
import skvideo.io
from baselines.a2c.utils import discount_with_dones
class Runner(object):
def __init__(self, env, model, nsteps=5, gamma=0.99, render=False, record_name=None):
self.env = env
self.model = model
self.render = render
self.recordName = record_name
self.record = record_name is not None
self.recording = []
nh, nw, nc = env.observation_space.shape
self.nenv = env.num_envs
self.batch_ob_shape = (self.nenv*nsteps, nh, nw, nc)
self.obs = np.zeros((self.nenv, nh, nw, nc), dtype=np.uint8)
self.nc = nc
obs = env.reset()
self.gamma = gamma
self.nsteps = nsteps
self.states = model.initial_state
self.dones = [False for _ in range(self.nenv)]
self.final_rewards = []
self.episode_rewards = [0 for _ in range(self.nenv)]
self.last_level_id = None
def update_obs(self, obs):
# Do frame-stacking here instead of the FrameStack wrapper to reduce
# IPC overhead
self.obs = np.roll(self.obs, shift=-self.nc, axis=3)
self.obs[:, :, :, -self.nc:] = obs
def makevideo(self):
vid = 1
name = "{}_{}.mp4".format(self.recordName,vid)
while os.path.isfile(name):
vid+=1
name = "{}_{}.mp4".format(self.recordName,vid)
writer = skvideo.io.FFmpegWriter(name)
for obs in self.recording:
writer.writeFrame(obs)
self.recording = []
writer.close()
def run(self):
mb_obs, mb_rewards, mb_actions, mb_values, mb_dones = [],[],[],[],[]
mb_states = self.states
for n in range(self.nsteps):
actions, values, states, _ = self.model.step(self.obs, self.states, self.dones)
mb_obs.append(np.copy(self.obs))
mb_actions.append(actions)
mb_values.append(values)
mb_dones.append(self.dones)
obs, rewards, dones, _ = self.env.step(actions)
if self.render:
self.env.render()
if self.record:
frame = obs[0,:,:,:3]
self.recording.append(frame)
self.states = states
self.dones = dones
for n, done in enumerate(dones):
if done:
self.obs[n] = self.obs[n]*0
self.obs = obs
mb_rewards.append(rewards)
mb_dones.append(self.dones)
#batch of steps to batch of rollouts
mb_obs = np.asarray(mb_obs, dtype=np.uint8).swapaxes(1, 0).reshape(self.batch_ob_shape)
mb_rewards = np.asarray(mb_rewards, dtype=np.float32).swapaxes(1, 0)
mb_actions = np.asarray(mb_actions, dtype=np.int32).swapaxes(1, 0)
mb_values = np.asarray(mb_values, dtype=np.float32).swapaxes(1, 0)
mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)
mb_masks = mb_dones[:, :-1]
mb_dones = mb_dones[:, 1:]
last_values = self.model.value(self.obs, self.states, self.dones).tolist()
#discount/bootstrap off value fn
for n, (rewards, dones, value) in enumerate(zip(mb_rewards, mb_dones, last_values)):
rewards = rewards.tolist()
dones = dones.tolist()
# Iterate rewards step-by-step and add to final scores if done
for i in range(self.nsteps):
# Add reward to episode reward
self.episode_rewards[n] += rewards[i]
if dones[i] == 1:
# Add final result to episode rewards
self.final_rewards.append(self.episode_rewards[n])
# Reset local episode reward
self.episode_rewards[n] = 0
#Save current game as a video
if self.record:
self.makevideo()
# Discount rewards
if dones[-1] == 0:
rewards = discount_with_dones(rewards+[value], dones+[0], self.gamma)[:-1]
else:
rewards = discount_with_dones(rewards, dones, self.gamma)
mb_rewards[n] = rewards
mb_rewards = mb_rewards.flatten()
mb_actions = mb_actions.flatten()
mb_values = mb_values.flatten()
mb_masks = mb_masks.flatten()
return mb_obs, mb_states, mb_rewards, mb_masks, mb_actions, mb_values