diff --git a/gamlet/utils.py b/gamlet/utils.py index 8498657c..417f0082 100644 --- a/gamlet/utils.py +++ b/gamlet/utils.py @@ -3,4 +3,4 @@ def project_root() -> Path: """Returns MetaFEDOT project root folder.""" - return Path(__file__).parent.parent.parent \ No newline at end of file + return Path(__file__).parent.parent.parent diff --git a/meta_automl/utils.py b/meta_automl/utils.py deleted file mode 100644 index 8498657c..00000000 --- a/meta_automl/utils.py +++ /dev/null @@ -1,6 +0,0 @@ -from pathlib import Path - - -def project_root() -> Path: - """Returns MetaFEDOT project root folder.""" - return Path(__file__).parent.parent.parent \ No newline at end of file diff --git a/rl_core/agent/decision_transformer.py b/rl_core/agent/decision_transformer.py index db7909e6..e1420eda 100644 --- a/rl_core/agent/decision_transformer.py +++ b/rl_core/agent/decision_transformer.py @@ -13,7 +13,6 @@ def __init__(self, input_dim, embed_dim): self.embedding = nn.Linear(input_dim, embed_dim) def forward(self, x, pos_embedding): - output = self.embedding(x) return self.embedding(x) + pos_embedding @@ -21,7 +20,10 @@ class DecisionTransformer(nn.Module): """ https://arxiv.org/abs/2106.01345 """ metadata = {'name': 'DecisionTransformer'} - def __init__(self, state_dim, action_dim, max_length, embed_dim, num_heads, num_layers, dim_feedforward=2048, device=DEVICE): + def __init__( + self, state_dim, action_dim, max_length, embed_dim, num_heads, num_layers, dim_feedforward=2048, + device=DEVICE + ): super().__init__() self.state_dim = state_dim self.action_dim = action_dim diff --git a/rl_core/agent/dqn.py b/rl_core/agent/dqn.py index 6821ac26..2633bbdc 100644 --- a/rl_core/agent/dqn.py +++ b/rl_core/agent/dqn.py @@ -33,7 +33,8 @@ class DQN: """ https://arxiv.org/abs/1312.5602 """ metadata = {'name': 'DQN'} - def __init__(self, state_dim, action_dim, hidden_dim=512, gamma=0.01, lr=1e-4, batch_size=64, eps_decrease=1e-6, eps_min=1e-3, device='cuda'): + def __init__(self, state_dim, action_dim, hidden_dim=512, gamma=0.01, lr=1e-4, batch_size=64, eps_decrease=1e-6, + eps_min=1e-3, device='cuda'): self.state_dim = state_dim self.action_dim = action_dim self.hidden_dim = hidden_dim diff --git a/rl_core/agent/ppo.py b/rl_core/agent/ppo.py index 595f5622..03d6ffc5 100644 --- a/rl_core/agent/ppo.py +++ b/rl_core/agent/ppo.py @@ -43,7 +43,8 @@ def get_size(self): dones_size = self.convert_size(self.dones.__sizeof__()) masks_size = self.convert_size(self.masks.__sizeof__()) - return f'states: {states_size}, actions: {actions_size}, rewards: {rewards_size}, dones: {dones_size}, masks: {masks_size}' + return f'states: {states_size}, actions: {actions_size}, rewards: {rewards_size},' \ + f' dones: {dones_size}, masks: {masks_size}' @staticmethod def convert_size(size_bytes): @@ -207,7 +208,9 @@ def update(self): entropy_penalty = -self.tau * entropy # KL-Divergence - kld = torch.nn.functional.kl_div(b_m_new_log_probs, b_m_old_log_probs, log_target=True).detach().cpu().item() + kld = torch.nn.functional.kl_div( + b_m_new_log_probs, b_m_old_log_probs, log_target=True + ).detach().cpu().item() b_ratio = torch.exp(b_new_log_probs - b_old_log_probs) pi_loss_1 = b_ratio * b_advantage.detach() diff --git a/rl_core/environments/time_series.py b/rl_core/environments/time_series.py index 50c1688f..4cc572c4 100644 --- a/rl_core/environments/time_series.py +++ b/rl_core/environments/time_series.py @@ -14,7 +14,7 @@ from sklearn.metrics import mean_absolute_error from sklearn.model_selection import train_test_split -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.dataloader import TimeSeriesDataLoader PLOT_PRED = False @@ -273,7 +273,7 @@ def step(self, action: int, mode: str = 'train') -> (np.ndarray, int, bool, bool assert action in self.action_space # Checks if action is not valid - if not action in self._get_available_actions().keys(): + if action not in self._get_available_actions().keys(): terminated = False truncated = False reward = -0.01 @@ -570,7 +570,12 @@ def _get_maximum_number_of_actions_in_environment(number_of_nodes) -> int: dataloader = TimeSeriesDataLoader(train_datasets, path_to_meta_data=path_to_meta_data) train_data, test_data, meta_data = dataloader.get_data(dataset_name='M4_Q5278') - env = TimeSeriesPipelineEnvironment(max_number_of_nodes=10, using_number_of_nodes=10, render_mode='pipeline_plot', metadata_dim=125) + env = TimeSeriesPipelineEnvironment( + max_number_of_nodes=10, + using_number_of_nodes=10, + render_mode='pipeline_plot', + metadata_dim=125 + ) env.load_data(train_data, test_data, meta_data) terminated = False diff --git a/rl_core/experiments/ts_experiment_dqn.py b/rl_core/experiments/ts_experiment_dqn.py index a8df693b..0596f6e6 100644 --- a/rl_core/experiments/ts_experiment_dqn.py +++ b/rl_core/experiments/ts_experiment_dqn.py @@ -1,17 +1,14 @@ import os.path -from itertools import product import numpy as np -from joblib import Parallel, delayed +import wandb from torch.utils.tensorboard import SummaryWriter -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.agent.dqn import DQN from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import define_data_for_experiment, OFFLINE_TRAJECTORIES -import wandb - def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, eps_min, eps_decrease): wandb.init( @@ -138,7 +135,6 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e print(f'-- Finishing {episode} episode --\n') # -- Saving Agent --- - name = f'{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}' path = f'{log_dir}/weights/{name}' agent.save(path) @@ -156,15 +152,3 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e } run_experiment(2000, 10, 512, 0.75, 1e-5, 1e-4) - - # flag_to_continue = False - # last_experiment = [2, 512, 0.05, 0.001, 0.01] - - # for m, h, g, e_m, e_d in product(*params.values()): - # if [m, h, g, e_m, e_d] == last_experiment: - # flag_to_continue = True - # - # if flag_to_continue: - # run_experiment(2000, m, h, g, e_m, e_d) - - # Parallel(n_jobs=-2)(delayed(run_experiment)(2000, m, h, g, e_m, e_d) for m, h, g, e_m, e_d in product(*params.values())) diff --git a/rl_core/experiments/ts_experiment_dt.py b/rl_core/experiments/ts_experiment_dt.py index 520b77b9..78f35b5c 100644 --- a/rl_core/experiments/ts_experiment_dt.py +++ b/rl_core/experiments/ts_experiment_dt.py @@ -1,25 +1,25 @@ -import os import random import numpy as np import torch -from torch import nn from torch.utils.data import random_split, DataLoader from tqdm import tqdm -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.agent.decision_transformer import DecisionTransformer from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import OFFLINE_TRAJECTORIES, define_data_for_experiment DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' + def encode_one_hot(target_dim, num_dims): output = np.zeros(num_dims) output[target_dim] = 1 return output + class EnvDataset(torch.utils.data.Dataset): def __init__(self, env, max_length, num_trajectories, goal): self.data = [] @@ -99,6 +99,7 @@ def collate_batch(batch): return zip(*result) + def evaluate_model(model, env, max_length, target_return, info_return=False): model.eval() @@ -183,7 +184,7 @@ def validate_model(model, dataloader, max_length, target_return): loss = criterion(predicted_actions, tensor_actions.detach()) total_loss += loss.item() - pbar.set_postfix({"loss":loss.item()}) + pbar.set_postfix({"loss": loss.item()}) average_loss = total_loss / len(dataloader) print(f"Average validation loss: {average_loss}") @@ -253,7 +254,6 @@ def inference_model(model, env_dataloader_test, test_list, max_length): info['pipeline'].show() - if __name__ == '__main__': number_of_nodes_in_pipeline = 5 @@ -311,7 +311,7 @@ def inference_model(model, env_dataloader_test, test_list, max_length): target_return=max_return ) - exp_name = f'DecisionTransformer' + exp_name = 'DecisionTransformer' log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dt/{number_of_nodes_in_pipeline}/{exp_name}' name = f'{model.metadata["name"]}_{n_epochs}' # os.mkdir(f'{log_dir}/weights') diff --git a/rl_core/experiments/ts_experiment_ppo.py b/rl_core/experiments/ts_experiment_ppo.py index 4a581354..04739fc2 100644 --- a/rl_core/experiments/ts_experiment_ppo.py +++ b/rl_core/experiments/ts_experiment_ppo.py @@ -4,7 +4,7 @@ import numpy as np from torch.utils.tensorboard import SummaryWriter -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.agent.ppo import PPO from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import define_data_for_experiment @@ -36,7 +36,6 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3): total_metrics = [] period = 20 - period_of_cleaning = 15 period_of_heatmap = 100 for episode in range(1, n_episodes + 1): @@ -50,7 +49,6 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3): done = False episode_reward = 0 - episode_metric = 0 probs_matrix = np.zeros((env.action_dim, env.max_number_of_actions)) m_idx = 0 @@ -115,7 +113,7 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3): actions_labels = [str(env.get_action_code(action)) for action in range(len(probs_matrix[:, 0]))] - fig = sns.heatmap( + sns.heatmap( probs_matrix, annot=labels, yticklabels=actions_labels, @@ -134,7 +132,8 @@ def run_experiment(n_episodes=2000, number_of_nodes_in_pipeline=3): print(f'-- Finishing {episode} episode --\n') # -- Saving Agent --- - name = f'{env.metadata["name"]}_{state_dim}_{number_of_nodes_in_pipeline}_{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}' + name = f'{env.metadata["name"]}_{state_dim}_{number_of_nodes_in_pipeline}_{agent.metadata["name"]}_' \ + f'{agent.hidden_dim}_{n_episodes}' path = f'{log_dir}/weights/{name}' agent.save(path) diff --git a/rl_core/experiments/ts_stablebaseline3.py b/rl_core/experiments/ts_stablebaseline3.py index b5fdd524..c98697a8 100644 --- a/rl_core/experiments/ts_stablebaseline3.py +++ b/rl_core/experiments/ts_stablebaseline3.py @@ -1,20 +1,11 @@ -import random - -import torch -from wandb.integration.sb3 import WandbCallback - -import wandb import gymnasium as gym import numpy as np -from sb3_contrib import MaskablePPO, RecurrentPPO -from stable_baselines3.common.vec_env import DummyVecEnv -from stable_baselines3 import PPO - -from sb3_contrib.common.maskable.policies import MaskableMultiInputActorCriticPolicy -from sb3_contrib.common.wrappers import ActionMasker +import wandb +from sb3_contrib import RecurrentPPO +from wandb.integration.sb3 import WandbCallback from rl_core.environments.time_series import TimeSeriesPipelineEnvironment -from rl_core.utils import define_data_for_experiment, OFFLINE_TRAJECTORIES +from rl_core.utils import define_data_for_experiment def mask_fn(env: gym.Env) -> np.ndarray: @@ -36,7 +27,13 @@ def mask_fn(env: gym.Env) -> np.ndarray: 'clip_range': 0.2, } - run = wandb.init(project='sb3_ts_recurrent', config=config, sync_tensorboard=True, monitor_gym=False, save_code=True) + run = wandb.init( + project='sb3_ts_recurrent', + config=config, + sync_tensorboard=True, + monitor_gym=False, + save_code=True + ) env_params = dict( max_number_of_nodes=config['max_number_of_nodes_in_pipeline'], @@ -87,8 +84,9 @@ def mask_fn(env: gym.Env) -> np.ndarray: state, _ = env.reset() while not done: - # action, _state = model.predict(state, action_masks=env.valid_action_mask()) action, _state = model.predict(state) + # For MaskedPPO use next: + # action, _state = model.predict(state, action_masks=env.valid_action_mask()) print(f'{action}', end=', ') next_state, reward, terminated, truncated, info = env.step(action.item()) @@ -99,4 +97,3 @@ def mask_fn(env: gym.Env) -> np.ndarray: print(f'reward {reward} \ninfo: {info}') info['pipeline'].show() - diff --git a/rl_core/experiments/ts_validation_dqn.py b/rl_core/experiments/ts_validation_dqn.py index 407dc83c..119de13c 100644 --- a/rl_core/experiments/ts_validation_dqn.py +++ b/rl_core/experiments/ts_validation_dqn.py @@ -6,7 +6,7 @@ import torch from torch.utils.tensorboard import SummaryWriter -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.agent.dqn import DQN from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import define_data_for_experiment diff --git a/rl_core/experiments/ts_validation_dt.py b/rl_core/experiments/ts_validation_dt.py index 407dc83c..79922c00 100644 --- a/rl_core/experiments/ts_validation_dt.py +++ b/rl_core/experiments/ts_validation_dt.py @@ -6,14 +6,15 @@ import torch from torch.utils.tensorboard import SummaryWriter -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.agent.dqn import DQN from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import define_data_for_experiment def print_params(experiment_name, number_of_nodes_in_pipeline=8): - log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dqn/{number_of_nodes_in_pipeline}/{experiment_name}' + log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/dqn/' \ + f'{number_of_nodes_in_pipeline}/{experiment_name}' with io.open(f'{log_dir}/params.log', 'r', encoding='utf-8') as file: lines = file.readlines() @@ -57,8 +58,6 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e total_metrics = [] period = 20 - period_of_cleaning = 15 - period_of_heatmap = 100 for episode in range(1, n_episodes + 1): print(f'-- Starting {episode} episode --') @@ -119,7 +118,8 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e print(f'-- Finishing {episode} episode --\n') # -- Saving Agent --- - name = f'{env.metadata["name"]}_{number_of_nodes_in_pipeline}_{state_dim}_{agent.metadata["name"]}_{agent.hidden_dim}_{n_episodes}' + name = f'{env.metadata["name"]}_{number_of_nodes_in_pipeline}_{state_dim}_{agent.metadata["name"]}' \ + f'_{agent.hidden_dim}_{n_episodes}' path = f'{log_dir}/weight' if not os.path.exists(path): @@ -191,7 +191,4 @@ def run_experiment(n_episodes, number_of_nodes_in_pipeline, hidden_dim, gamma, e ] for h, g, e, e_m, e_d in params: - run_experiment(2000, 8, h, g, e, e_m, e_d) - - # Parallel(n_jobs=-2)( - # delayed(run_experiment)(2000, 8, h, g, e, e_m, e_d) for m, h, g, e, e_m, e_d in params) \ No newline at end of file + run_experiment(2000, 8, h, g, e, e_m, e_d) \ No newline at end of file diff --git a/rl_core/pipeline_validation.py b/rl_core/pipeline_validation.py index 247f7eec..309b1565 100644 --- a/rl_core/pipeline_validation.py +++ b/rl_core/pipeline_validation.py @@ -6,7 +6,7 @@ from fedot.core.pipelines.pipeline_builder import PipelineBuilder from sklearn.metrics import mean_absolute_error -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.dataloader import TimeSeriesDataLoader from rl_core.utils import define_data_for_experiment diff --git a/rl_core/random_agent_ts.py b/rl_core/random_agent_ts.py index 524d4f59..090327ac 100644 --- a/rl_core/random_agent_ts.py +++ b/rl_core/random_agent_ts.py @@ -1,14 +1,10 @@ -import datetime - import numpy as np from torch.utils.tensorboard import SummaryWriter -from meta_automl.utils import project_root -from rl_core.agent.ppo import PPO +from gamlet.utils import project_root from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from rl_core.utils import define_data_for_experiment - if __name__ == '__main__': """ Running Agent with Random Actions sampling """ @@ -19,8 +15,8 @@ env = TimeSeriesPipelineEnvironment(render_mode='none', metadata_dim=126) state_dim, action_dim = env.state_dim, env.action_dim - # time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/random_action/{number_of_nodes_in_pipeline}/random_agent' + log_dir = f'{project_root()}/MetaFEDOT/rl_core/agent/tensorboard_logs/random_action/' \ + f'{number_of_nodes_in_pipeline}/random_agent' tb_writer = SummaryWriter(log_dir=log_dir) # -- Starting experiment -- @@ -83,4 +79,4 @@ print(f'-- Finishing {episode} episode --\n') - print(f'Minimum reward {min(total_rewards)} -- Maximum reward {max(total_rewards)}') \ No newline at end of file + print(f'Minimum reward {min(total_rewards)} -- Maximum reward {max(total_rewards)}') diff --git a/rl_core/utils.py b/rl_core/utils.py index f5987585..c43f8863 100644 --- a/rl_core/utils.py +++ b/rl_core/utils.py @@ -1,9 +1,8 @@ import os -import pandas as pd -from sklearn.model_selection import train_test_split +import pandas as pd -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.dataloader import TimeSeriesDataLoader OFFLINE_TRAJECTORIES = [ @@ -28,16 +27,10 @@ def define_data_for_experiment(test_size: int = 3): data_folder_path = os.path.join(str(project_root()), 'MetaFEDOT\\data\\knowledge_base_time_series_0\\datasets\\') - dataset_names = [name for name in os.listdir(data_folder_path)] temp = pd.read_csv('pipeline_validation_results.csv', index_col=0) - train = temp[temp['Topo Pipeline'].isna() == True]['Dataset'].to_list() - test = temp[temp['Topo Pipeline'].isna() == False]['Dataset'].to_list() - - # if test_size: - # train, test = train_test_split(dataset_names, test_size=3) - # else: - # train, test = dataset_names, [] + train = temp[temp['Topo Pipeline'].isna() is True]['Dataset'].to_list() + test = temp[temp['Topo Pipeline'].isna() is False]['Dataset'].to_list() train_datasets = {} for dataset in train: diff --git a/tests/unit/rl_test/test_environment.py b/tests/unit/rl_test/test_environment.py index 7b05cee6..16f88387 100644 --- a/tests/unit/rl_test/test_environment.py +++ b/tests/unit/rl_test/test_environment.py @@ -6,7 +6,7 @@ from fedot.core.repository.operation_types_repository import OperationTypesRepository from fedot.core.repository.tasks import TaskTypesEnum -from meta_automl.utils import project_root +from gamlet.utils import project_root from rl_core.dataloader import DataLoader from rl_core.environments.ensemble import EnsemblePipelineGenerationEnvironment from rl_core.environments.linear import LinearPipelineGenerationEnvironment diff --git a/tests/unit/rl_test/test_ts_env.py b/tests/unit/rl_test/test_ts_env.py index 2b872735..2297d63a 100644 --- a/tests/unit/rl_test/test_ts_env.py +++ b/tests/unit/rl_test/test_ts_env.py @@ -1,13 +1,12 @@ from itertools import permutations -from typing import io import numpy as np import pytest -from meta_automl.utils import project_root from rl_core.environments.time_series import TimeSeriesPipelineEnvironment from tests.unit.rl_test.utils import get_time_series + # { # 1: 'adareg', # 2: 'ar', @@ -43,12 +42,14 @@ @pytest.mark.parametrize('trajectory', - [ - [2, 0], [9, 0], [11, 0], [12, 0], [13, 0], [14, 0], # Single node pipelines - [25, 1, 31, 0], [25, 3, 31, 0], [25, 4, 31, 0], [25, 5, 31, 0], [25, 6, 31, 0], # (1) lagged -> ..model.. - [25, 7, 31, 0], [25, 8, 31, 0], [25, 10, 31, 0], [25, 15, 31, 0], # (2) lagged -> ..model.. - ] -) + [ + [2, 0], [9, 0], [11, 0], [12, 0], [13, 0], [14, 0], # Single node pipelines + [25, 1, 31, 0], [25, 3, 31, 0], [25, 4, 31, 0], [25, 5, 31, 0], [25, 6, 31, 0], + # (1) lagged -> ..model.. + [25, 7, 31, 0], [25, 8, 31, 0], [25, 10, 31, 0], [25, 15, 31, 0], + # (2) lagged -> ..model.. + ] + ) def test_correct_pipelines(trajectory): train_data, test_data = get_time_series() @@ -68,10 +69,10 @@ def test_correct_pipelines(trajectory): @pytest.mark.parametrize('trajectory', - [ - [1, 2, 3, 31, 41, 49, 0], # Call pipeline.depth == -1 - ] -) + [ + [1, 2, 3, 31, 41, 49, 0], # Call pipeline.depth == -1 + ] + ) def test_uncorrect_pipelines(trajectory): train_data, test_data = get_time_series() @@ -94,7 +95,11 @@ def test_uncorrect_pipelines(trajectory): def test_max_number_of_actions_in_pipelines(max_number_of_nodes): train_data, test_data = get_time_series() - env = TimeSeriesPipelineEnvironment(max_number_of_nodes=max_number_of_nodes, metadata_dim=0, using_number_of_nodes=max_number_of_nodes) + env = TimeSeriesPipelineEnvironment( + max_number_of_nodes=max_number_of_nodes, + metadata_dim=0, + using_number_of_nodes=max_number_of_nodes + ) env.load_data(train_data, test_data, meta=None) env.reset() @@ -120,9 +125,7 @@ def test_max_number_of_actions_in_pipelines(max_number_of_nodes): assert action_number == env.max_number_of_actions -@pytest.mark.parametrize('trajectory', - [list(pair) for pair in permutations(range(1, 30), 2)] -) +@pytest.mark.parametrize('trajectory', [list(pair) for pair in permutations(range(1, 30), 2)]) def test_correct_pairs_pipelines(trajectory: list): train_data, test_data = get_time_series() diff --git a/tests/unit/rl_test/utils.py b/tests/unit/rl_test/utils.py index 8f514417..b9a297de 100644 --- a/tests/unit/rl_test/utils.py +++ b/tests/unit/rl_test/utils.py @@ -52,4 +52,4 @@ def get_time_series(len_forecast=5, length=80): data_type=DataTypesEnum.ts ) - return train_data, test_data, \ No newline at end of file + return train_data, test_data