Skip to content

Commit

Permalink
PEP8
Browse files Browse the repository at this point in the history
  • Loading branch information
aPovidlo committed May 21, 2024
1 parent 78697b0 commit a649ea9
Show file tree
Hide file tree
Showing 18 changed files with 82 additions and 108 deletions.
2 changes: 1 addition & 1 deletion gamlet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

def project_root() -> Path:
"""Returns MetaFEDOT project root folder."""
return Path(__file__).parent.parent.parent
return Path(__file__).parent.parent.parent
6 changes: 0 additions & 6 deletions meta_automl/utils.py

This file was deleted.

6 changes: 4 additions & 2 deletions rl_core/agent/decision_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ def __init__(self, input_dim, embed_dim):
self.embedding = nn.Linear(input_dim, embed_dim)

def forward(self, x, pos_embedding):
output = self.embedding(x)
return self.embedding(x) + pos_embedding


class DecisionTransformer(nn.Module):
""" https://arxiv.org/abs/2106.01345 """
metadata = {'name': 'DecisionTransformer'}

def __init__(self, state_dim, action_dim, max_length, embed_dim, num_heads, num_layers, dim_feedforward=2048, device=DEVICE):
def __init__(
self, state_dim, action_dim, max_length, embed_dim, num_heads, num_layers, dim_feedforward=2048,
device=DEVICE
):
super().__init__()
self.state_dim = state_dim
self.action_dim = action_dim
Expand Down
3 changes: 2 additions & 1 deletion rl_core/agent/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class DQN:
""" https://arxiv.org/abs/1312.5602 """
metadata = {'name': 'DQN'}

def __init__(self, state_dim, action_dim, hidden_dim=512, gamma=0.01, lr=1e-4, batch_size=64, eps_decrease=1e-6, eps_min=1e-3, device='cuda'):
def __init__(self, state_dim, action_dim, hidden_dim=512, gamma=0.01, lr=1e-4, batch_size=64, eps_decrease=1e-6,
eps_min=1e-3, device='cuda'):
self.state_dim = state_dim
self.action_dim = action_dim
self.hidden_dim = hidden_dim
Expand Down
7 changes: 5 additions & 2 deletions rl_core/agent/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def get_size(self):
dones_size = self.convert_size(self.dones.__sizeof__())
masks_size = self.convert_size(self.masks.__sizeof__())

return f'states: {states_size}, actions: {actions_size}, rewards: {rewards_size}, dones: {dones_size}, masks: {masks_size}'
return f'states: {states_size}, actions: {actions_size}, rewards: {rewards_size},' \
f' dones: {dones_size}, masks: {masks_size}'

@staticmethod
def convert_size(size_bytes):
Expand Down Expand Up @@ -207,7 +208,9 @@ def update(self):
entropy_penalty = -self.tau * entropy

# KL-Divergence
kld = torch.nn.functional.kl_div(b_m_new_log_probs, b_m_old_log_probs, log_target=True).detach().cpu().item()
kld = torch.nn.functional.kl_div(
b_m_new_log_probs, b_m_old_log_probs, log_target=True
).detach().cpu().item()

b_ratio = torch.exp(b_new_log_probs - b_old_log_probs)
pi_loss_1 = b_ratio * b_advantage.detach()
Expand Down
11 changes: 8 additions & 3 deletions rl_core/environments/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.dataloader import TimeSeriesDataLoader

PLOT_PRED = False
Expand Down Expand Up @@ -273,7 +273,7 @@ def step(self, action: int, mode: str = 'train') -> (np.ndarray, int, bool, bool
assert action in self.action_space

# Checks if action is not valid
if not action in self._get_available_actions().keys():
if action not in self._get_available_actions().keys():
terminated = False
truncated = False
reward = -0.01
Expand Down Expand Up @@ -570,7 +570,12 @@ def _get_maximum_number_of_actions_in_environment(number_of_nodes) -> int:
dataloader = TimeSeriesDataLoader(train_datasets, path_to_meta_data=path_to_meta_data)
train_data, test_data, meta_data = dataloader.get_data(dataset_name='M4_Q5278')

env = TimeSeriesPipelineEnvironment(max_number_of_nodes=10, using_number_of_nodes=10, render_mode='pipeline_plot', metadata_dim=125)
env = TimeSeriesPipelineEnvironment(
max_number_of_nodes=10,
using_number_of_nodes=10,
render_mode='pipeline_plot',
metadata_dim=125
)
env.load_data(train_data, test_data, meta_data)
terminated = False

Expand Down
20 changes: 2 additions & 18 deletions rl_core/experiments/ts_experiment_dqn.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import os.path
from itertools import product

import numpy as np
from joblib import Parallel, delayed
import wandb
from torch.utils.tensorboard import SummaryWriter

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.agent.dqn import DQN
from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import define_data_for_experiment, OFFLINE_TRAJECTORIES

import wandb


def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, eps_min, eps_decrease):
wandb.init(
Expand Down Expand Up @@ -138,7 +135,6 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e
print(f'-- Finishing {episode} episode --\n')

# -- Saving Agent ---

name = f'{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}'
path = f'{log_dir}/weights/{name}'
agent.save(path)
Expand All @@ -156,15 +152,3 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e
}

run_experiment(2000, 10, 512, 0.75, 1e-5, 1e-4)

# flag_to_continue = False
# last_experiment = [2, 512, 0.05, 0.001, 0.01]

# for m, h, g, e_m, e_d in product(*params.values()):
# if [m, h, g, e_m, e_d] == last_experiment:
# flag_to_continue = True
#
# if flag_to_continue:
# run_experiment(2000, m, h, g, e_m, e_d)

# Parallel(n_jobs=-2)(delayed(run_experiment)(2000, m, h, g, e_m, e_d) for m, h, g, e_m, e_d in product(*params.values()))
12 changes: 6 additions & 6 deletions rl_core/experiments/ts_experiment_dt.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import os
import random

import numpy as np
import torch
from torch import nn
from torch.utils.data import random_split, DataLoader
from tqdm import tqdm

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.agent.decision_transformer import DecisionTransformer
from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import OFFLINE_TRAJECTORIES, define_data_for_experiment

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'


def encode_one_hot(target_dim, num_dims):
output = np.zeros(num_dims)
output[target_dim] = 1

return output


class EnvDataset(torch.utils.data.Dataset):
def __init__(self, env, max_length, num_trajectories, goal):
self.data = []
Expand Down Expand Up @@ -99,6 +99,7 @@ def collate_batch(batch):

return zip(*result)


def evaluate_model(model, env, max_length, target_return, info_return=False):
model.eval()

Expand Down Expand Up @@ -183,7 +184,7 @@ def validate_model(model, dataloader, max_length, target_return):
loss = criterion(predicted_actions, tensor_actions.detach())

total_loss += loss.item()
pbar.set_postfix({"loss":loss.item()})
pbar.set_postfix({"loss": loss.item()})

average_loss = total_loss / len(dataloader)
print(f"Average validation loss: {average_loss}")
Expand Down Expand Up @@ -253,7 +254,6 @@ def inference_model(model, env_dataloader_test, test_list, max_length):
info['pipeline'].show()



if __name__ == '__main__':
number_of_nodes_in_pipeline = 5

Expand Down Expand Up @@ -311,7 +311,7 @@ def inference_model(model, env_dataloader_test, test_list, max_length):
target_return=max_return
)

exp_name = f'DecisionTransformer'
exp_name = 'DecisionTransformer'
log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dt/{number_of_nodes_in_pipeline}/{exp_name}'
name = f'{model.metadata["name"]}_{n_epochs}'
# os.mkdir(f'{log_dir}/weights')
Expand Down
9 changes: 4 additions & 5 deletions rl_core/experiments/ts_experiment_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from torch.utils.tensorboard import SummaryWriter

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.agent.ppo import PPO
from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import define_data_for_experiment
Expand Down Expand Up @@ -36,7 +36,6 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3):
total_metrics = []

period = 20
period_of_cleaning = 15
period_of_heatmap = 100

for episode in range(1, n_episodes + 1):
Expand All @@ -50,7 +49,6 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3):

done = False
episode_reward = 0
episode_metric = 0
probs_matrix = np.zeros((env.action_dim, env.max_number_of_actions))
m_idx = 0

Expand Down Expand Up @@ -115,7 +113,7 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3):

actions_labels = [str(env.get_action_code(action)) for action in range(len(probs_matrix[:, 0]))]

fig = sns.heatmap(
sns.heatmap(
probs_matrix,
annot=labels,
yticklabels=actions_labels,
Expand All @@ -134,7 +132,8 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3):
print(f'-- Finishing {episode} episode --\n')

# -- Saving Agent ---
name = f'{env.metadata["name"]}_{state_dim}_{number_of_nodes_in_pipeline}_{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}'
name = f'{env.metadata["name"]}_{state_dim}_{number_of_nodes_in_pipeline}_{agent.metadata["name"]}_' \
f'{agent.hidden_dim}_{n_episodes}'
path = f'{log_dir}/weights/{name}'
agent.save(path)

Expand Down
29 changes: 13 additions & 16 deletions rl_core/experiments/ts_stablebaseline3.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,11 @@
import random

import torch
from wandb.integration.sb3 import WandbCallback

import wandb
import gymnasium as gym
import numpy as np
from sb3_contrib import MaskablePPO, RecurrentPPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO

from sb3_contrib.common.maskable.policies import MaskableMultiInputActorCriticPolicy
from sb3_contrib.common.wrappers import ActionMasker
import wandb
from sb3_contrib import RecurrentPPO
from wandb.integration.sb3 import WandbCallback

from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import define_data_for_experiment, OFFLINE_TRAJECTORIES
from rl_core.utils import define_data_for_experiment


def mask_fn(env: gym.Env) -> np.ndarray:
Expand All @@ -36,7 +27,13 @@ def mask_fn(env: gym.Env) -> np.ndarray:
'clip_range': 0.2,
}

run = wandb.init(project='sb3_ts_recurrent', config=config, sync_tensorboard=True, monitor_gym=False, save_code=True)
run = wandb.init(
project='sb3_ts_recurrent',
config=config,
sync_tensorboard=True,
monitor_gym=False,
save_code=True
)

env_params = dict(
max_number_of_nodes=config['max_number_of_nodes_in_pipeline'],
Expand Down Expand Up @@ -87,8 +84,9 @@ def mask_fn(env: gym.Env) -> np.ndarray:
state, _ = env.reset()

while not done:
# action, _state = model.predict(state, action_masks=env.valid_action_mask())
action, _state = model.predict(state)
# For MaskedPPO use next:
# action, _state = model.predict(state, action_masks=env.valid_action_mask())
print(f'{action}', end=', ')

next_state, reward, terminated, truncated, info = env.step(action.item())
Expand All @@ -99,4 +97,3 @@ def mask_fn(env: gym.Env) -> np.ndarray:
print(f'reward {reward} \ninfo: {info}')

info['pipeline'].show()

2 changes: 1 addition & 1 deletion rl_core/experiments/ts_validation_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
from torch.utils.tensorboard import SummaryWriter

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.agent.dqn import DQN
from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import define_data_for_experiment
Expand Down
15 changes: 6 additions & 9 deletions rl_core/experiments/ts_validation_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
import torch
from torch.utils.tensorboard import SummaryWriter

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.agent.dqn import DQN
from rl_core.environments.time_series import TimeSeriesPipelineEnvironment
from rl_core.utils import define_data_for_experiment


def print_params(experiment_name, number_of_nodes_in_pipeline=8):
log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dqn/{number_of_nodes_in_pipeline}/{experiment_name}'
log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dqn/' \
f'{number_of_nodes_in_pipeline}/{experiment_name}'

with io.open(f'{log_dir}/params.log', 'r', encoding='utf-8') as file:
lines = file.readlines()
Expand Down Expand Up @@ -57,8 +58,6 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e
total_metrics = []

period = 20
period_of_cleaning = 15
period_of_heatmap = 100

for episode in range(1, n_episodes + 1):
print(f'-- Starting {episode} episode --')
Expand Down Expand Up @@ -119,7 +118,8 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e
print(f'-- Finishing {episode} episode --\n')

# -- Saving Agent ---
name = f'{env.metadata["name"]}_{number_of_nodes_in_pipeline}_{state_dim}_{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}'
name = f'{env.metadata["name"]}_{number_of_nodes_in_pipeline}_{state_dim}_{agent.metadata["name"]}' \
f'_{agent.hidden_dim}_{n_episodes}'
path = f'{log_dir}/weight'

if not os.path.exists(path):
Expand Down Expand Up @@ -191,7 +191,4 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e
]

for h, g, e, e_m, e_d in params:
run_experiment(2000, 8, h, g, e, e_m, e_d)

# Parallel(n_jobs=-2)(
# delayed(run_experiment)(2000, 8, h, g, e, e_m, e_d) for m, h, g, e, e_m, e_d in params)
run_experiment(2000, 8, h, g, e, e_m, e_d)
2 changes: 1 addition & 1 deletion rl_core/pipeline_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from sklearn.metrics import mean_absolute_error

from meta_automl.utils import project_root
from gamlet.utils import project_root
from rl_core.dataloader import TimeSeriesDataLoader
from rl_core.utils import define_data_for_experiment

Expand Down
Loading

0 comments on commit a649ea9

Please sign in to comment.