-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresultmaker_d.py
90 lines (78 loc) · 4.07 KB
/
resultmaker_d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from rl.helpers import Load_data
import pathlib
import argparse
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from stable_baselines import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
from os.path import join
import pandas as pd
import yaml
import rl
path = pathlib.Path().absolute()
scalars = ['episode_reward', 'loss/loss']
timevar = 'step' # wall_time or step
to_combine = [
['ConveyorEnv12', '20210112_1530'], # 1x3
['ConveyorEnv12', '20210112_1445'], # 2x2
['ConveyorEnv12', '20210112_1600'], # 2x3
['ConveyorEnv12', '20210112_1700'], # 3x3
['ConveyorEnv12', '20210112_1800'], # 4x3
['ConveyorEnv12', '20210112_1900'] # 5x3
]
if __name__ == "__main__":
#parse the arguments from the parser
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--terms', type=str, help='Term to identify specific plot')
parser.add_argument('-n', '--num_episodes', type=int, help='Number of episodes to test on.')
args = parser.parse_args()
combinations = ['1x2', '2x2', '2x3', '3x3', '4x3', '5x3']
results = {}
for idx, combination in enumerate(to_combine):
env1, subdir = combination
# load config and variables needed
location_path = join(path, 'rl', 'trained_models', env1, subdir)
with open(join(location_path, 'config.yml'), 'r') as c:
config = yaml.load(c)
print('\nLoaded config file from: {}\n'.format(join(location_path, 'config.yml')))
model_config = config['models']['PPO2']
#switch termination cases:
config['environment']['terminate_on_idle'] = False
config['environment']['alternative_terminate'] = True
# initialize env with the config file
env_obj = getattr(rl.environments, env1)
env = env_obj(config)
# load best model from path
model = PPO2.load(join(location_path, 'best_model.zip'), env=DummyVecEnv([lambda: env]))
results[combinations[idx]] = {}
results[combinations[idx]]['configuration'] = '{}x{}'.format(config['environment']['amount_of_gtps'],
config['environment']['amount_of_outputs'])
results[combinations[idx]]['gamma'] = config['models']['PPO2']['gamma']
results[combinations[idx]]['idle_time'] = 0
results[combinations[idx]]['cycle_count'] = 0
results[combinations[idx]]['steps'] = 0
results[combinations[idx]]['items_processed'] = 0
results[combinations[idx]]['reward'] = 0
for episode in range(args.num_episodes):
# Run an episode
state = env.reset()
done = False
while not done:
action, _ = model.predict(state, deterministic=True)
state, reward, done, tc = env.step(action)
results[combinations[idx]]['reward'] += reward
results[combinations[idx]]['idle_time'] += sum(env.idle_times_operator.values())
results[combinations[idx]]['cycle_count'] += env.cycle_count
results[combinations[idx]]['steps'] += env.steps
results[combinations[idx]]['items_processed'] += env.items_processed
results[combinations[idx]]['idle_time'] = results[combinations[idx]]['idle_time'] / args.num_episodes
results[combinations[idx]]['cycle_count'] = results[combinations[idx]]['cycle_count'] / args.num_episodes
results[combinations[idx]]['steps'] = results[combinations[idx]]['steps'] / args.num_episodes
results[combinations[idx]]['items_processed'] = results[combinations[idx]]['items_processed'] / args.num_episodes
results[combinations[idx]]['reward'] = results[combinations[idx]]['reward'] / args.num_episodes
resultcsv = pd.DataFrame.from_dict(results).T
resultcsv['idle_percent'] = resultcsv.idle_time / resultcsv.steps
resultcsv['cycle_percent'] = resultcsv.cycle_count / resultcsv.items_processed
resultcsv.to_csv('evaluation_results/results_DRL_{}.csv'.format(args.terms))
print('Results saved to: evaluation_results/results_DRL_{}.csv'.format(args.terms))