From d93f51e19dd0da6fecdc3635339058205630da75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Degenne?= Date: Mon, 24 Jul 2023 18:02:40 +0200 Subject: [PATCH] Rename AgentManager to ExperimentManager (#350) * AgentManager -> ExperimentManager search and replace * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename file, add alias * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename file * add alias * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * search and replace agent_manager -> experiment_manager * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change test file names --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/api.rst | 4 +- docs/basics/DeepRLTutorial/TutorialDeepRL.rst | 12 +-- docs/basics/compare_agents.rst | 10 +-- docs/basics/evaluate_agent.rst | 10 +-- docs/basics/experiment_setup.rst | 10 +-- docs/basics/multiprocess.rst | 10 +-- docs/basics/quick_start_rl/quickstart.rst | 36 ++++---- docs/basics/rlberry how to.rst | 26 +++--- docs/basics/seeding.rst | 4 +- docs/changelog.rst | 26 +++--- docs/other/using_stable_baselines.rst | 8 +- examples/demo_agents/demo_SAC.py | 10 +-- examples/demo_bandits/plot_TS_bandit.py | 6 +- .../plot_compare_index_bandits.py | 4 +- examples/demo_bandits/plot_exp3_bandit.py | 4 +- examples/demo_bandits/plot_mirror_bandit.py | 4 +- examples/demo_bandits/plot_ucb_bandit.py | 4 +- .../example_atari_atlantis_vectorized_ppo.py | 4 +- .../example_atari_breakout_vectorized_ppo.py | 4 +- examples/demo_env/video_plot_atari_freeway.py | 4 +- examples/demo_experiment/run.py | 4 +- examples/demo_network/run_client.py | 2 +- examples/demo_network/run_remote_manager.py | 10 +-- examples/plot_agent_manager.py | 10 +-- examples/plot_checkpointing.py | 14 +-- examples/plot_writer_wrapper.py | 4 +- long_tests/rl_agent/ltest_mbqvi_applegold.py | 4 +- long_tests/torch_agent/ltest_a2c_cartpole.py | 4 +- .../torch_agent/ltest_ctn_ppo_a2c_pendulum.py | 6 +- .../torch_agent/ltest_dqn_montaincar.py | 4 +- .../torch_agent/ltest_dqn_vs_mdqn_acrobot.py | 6 +- rlberry/agents/agent.py | 16 ++-- .../stable_baselines/stable_baselines.py | 4 +- rlberry/agents/torch/tests/test_a2c.py | 12 +-- rlberry/agents/torch/tests/test_dqn.py | 16 ++-- rlberry/agents/torch/tests/test_ppo.py | 18 ++-- .../agents/torch/tests/test_torch_atari.py | 21 +++-- rlberry/envs/tests/test_gym_make.py | 4 +- rlberry/experiment/generator.py | 14 +-- rlberry/experiment/load_results.py | 12 +-- .../tests/test_experiment_generator.py | 34 ++++---- rlberry/experiment/tests/test_load_results.py | 6 +- rlberry/experiment/yaml_utils.py | 16 ++-- rlberry/manager/__init__.py | 7 +- rlberry/manager/evaluation.py | 78 +++++++++-------- ...agent_manager.py => experiment_manager.py} | 74 ++++++++-------- rlberry/manager/multiple_managers.py | 22 ++--- ...anager.py => remote_experiment_manager.py} | 36 ++++---- ..._manager.py => test_experiment_manager.py} | 86 +++++++++---------- ....py => test_experiment_manager_seeding.py} | 16 ++-- .../manager/tests/test_hyperparam_optim.py | 20 ++--- rlberry/manager/tests/test_plot.py | 12 +-- rlberry/manager/tests/test_shared_data.py | 4 +- rlberry/network/server_utils.py | 70 +++++++-------- rlberry/network/tests/test_server.py | 8 +- rlberry/utils/__init__.py | 2 +- rlberry/utils/check_agent.py | 54 ++++++------ rlberry/utils/check_bandit_agent.py | 6 +- rlberry/utils/tests/test_check.py | 14 +-- rlberry/utils/tests/test_writer.py | 4 +- rlberry/utils/writers.py | 2 +- 61 files changed, 491 insertions(+), 465 deletions(-) rename rlberry/manager/{agent_manager.py => experiment_manager.py} (94%) rename rlberry/manager/{remote_agent_manager.py => remote_experiment_manager.py} (84%) rename rlberry/manager/tests/{test_agent_manager.py => test_experiment_manager.py} (81%) rename rlberry/manager/tests/{test_agent_manager_seeding.py => test_experiment_manager_seeding.py} (84%) diff --git a/docs/api.rst b/docs/api.rst index 821428f5d..b59514195 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -15,7 +15,7 @@ Main classes :template: class.rst - manager.AgentManager + manager.ExperimentManager manager.MultipleManagers Evaluation and plot @@ -202,7 +202,7 @@ Check Utilities utils.check_save_load utils.check_fit_additive utils.check_seeding_agent - utils.check_agent_manager + utils.check_experiment_manager Logging Utilities ----------------- diff --git a/docs/basics/DeepRLTutorial/TutorialDeepRL.rst b/docs/basics/DeepRLTutorial/TutorialDeepRL.rst index 6e61aa031..99dbe4425 100644 --- a/docs/basics/DeepRLTutorial/TutorialDeepRL.rst +++ b/docs/basics/DeepRLTutorial/TutorialDeepRL.rst @@ -15,7 +15,7 @@ Imports .. code:: python from rlberry.envs import gym_make - from rlberry.manager import plot_writer_data, AgentManager, evaluate_agents + from rlberry.manager import plot_writer_data, ExperimentManager, evaluate_agents from rlberry.agents.torch import A2CAgent from rlberry.agents.torch.utils.training import model_factory_from_env @@ -149,9 +149,9 @@ default networks are: .. code:: python """ - The AgentManager class is compact way of experimenting with a deepRL agent. + The ExperimentManager class is compact way of experimenting with a deepRL agent. """ - default_agent = AgentManager( + default_agent = ExperimentManager( A2CAgent, # The Agent class. (gym_make, dict(id="CartPole-v1")), # The Environment to solve. fit_budget=3e5, # The number of interactions @@ -182,7 +182,7 @@ default networks are: .. parsed-literal:: - [INFO] Running AgentManager fit() for A2C default with n_fit = 1 and max_workers = None. + [INFO] Running ExperimentManager fit() for A2C default with n_fit = 1 and max_workers = None. INFO: Making new env: CartPole-v1 INFO: Making new env: CartPole-v1 [INFO] Could not find least used device (nvidia-smi might be missing), use cuda:0 instead @@ -353,7 +353,7 @@ and bigger batch size to have more stable training. .. code:: python - tuned_agent = AgentManager( + tuned_agent = ExperimentManager( A2CAgent, # The Agent class. (gym_make, dict(id="CartPole-v1")), # The Environment to solve. init_kwargs=dict( # Where to put the agent's hyperparameters @@ -399,7 +399,7 @@ and bigger batch size to have more stable training. .. parsed-literal:: - [INFO] Running AgentManager fit() for A2C tuned with n_fit = 1 and max_workers = None. + [INFO] Running ExperimentManager fit() for A2C tuned with n_fit = 1 and max_workers = None. INFO: Making new env: CartPole-v1 INFO: Making new env: CartPole-v1 [INFO] Could not find least used device (nvidia-smi might be missing), use cuda:0 instead diff --git a/docs/basics/compare_agents.rst b/docs/basics/compare_agents.rst index 8bcfbfe9c..b3cd46b04 100644 --- a/docs/basics/compare_agents.rst +++ b/docs/basics/compare_agents.rst @@ -8,7 +8,7 @@ Compare different agents Two or more agents can be compared using the classes -:class:`~rlberry.manager.agent_manager.AgentManager` and +:class:`~rlberry.manager.experiment_manager.ExperimentManager` and :class:`~rlberry.manager.multiple_managers.MultipleManagers`, as in the example below. @@ -18,7 +18,7 @@ Two or more agents can be compared using the classes from rlberry.envs.classic_control import MountainCar from rlberry.agents.torch.reinforce import REINFORCEAgent from rlberry.agents.kernel_based.rs_kernel_ucbvi import RSKernelUCBVIAgent - from rlberry.manager import AgentManager, MultipleManagers, plot_writer_data + from rlberry.manager import ExperimentManager, MultipleManagers, plot_writer_data # Environment constructor and kwargs @@ -38,10 +38,10 @@ Two or more agents can be compared using the classes eval_kwargs = dict(eval_horizon=200) - # Create AgentManager for REINFORCE and RSKernelUCBVI + # Create ExperimentManager for REINFORCE and RSKernelUCBVI multimanagers = MultipleManagers() multimanagers.append( - AgentManager( + ExperimentManager( REINFORCEAgent, env, init_kwargs=params["reinforce"], @@ -51,7 +51,7 @@ Two or more agents can be compared using the classes ) ) multimanagers.append( - AgentManager( + ExperimentManager( RSKernelUCBVIAgent, env, init_kwargs=params["kernel"], diff --git a/docs/basics/evaluate_agent.rst b/docs/basics/evaluate_agent.rst index 9b2850f2d..9f2572760 100644 --- a/docs/basics/evaluate_agent.rst +++ b/docs/basics/evaluate_agent.rst @@ -9,14 +9,14 @@ Evaluate an agent and optimize its hyperparameters With rlberry_, once you created your agent, it is very easy to train in parallel several instances of it, analyze the results and optimize hyperparameters. -This is one of the purposes of the :class:`~rlberry.manager.agent_manager.AgentManager` class, +This is one of the purposes of the :class:`~rlberry.manager.experiment_manager.ExperimentManager` class, as shown in the examples below. .. code-block:: python from rlberry.envs import gym_make from rlberry.agents.torch.reinforce import REINFORCEAgent - from rlberry.manager import AgentManager, plot_writer_data + from rlberry.manager import ExperimentManager, plot_writer_data # Environment (constructor, kwargs) @@ -33,8 +33,8 @@ as shown in the examples below. eval_kwargs = dict(eval_horizon=500) # parameters to evaluate the agent - # Create AgentManager to fit 4 instances of REINFORCE in parallel. - stats = AgentManager( + # Create ExperimentManager to fit 4 instances of REINFORCE in parallel. + stats = ExperimentManager( REINFORCEAgent, env, init_kwargs=params, @@ -87,7 +87,7 @@ For :class:`~rlberry.agents.reinforce.reinforce.REINFORCEAgent`, this method loo Now we can use the :meth:`optimize_hyperparams` method -of :class:`~rlberry.manager.agent_manager.AgentManager` to find good parameters for our agent: +of :class:`~rlberry.manager.experiment_manager.ExperimentManager` to find good parameters for our agent: .. code-block:: python diff --git a/docs/basics/experiment_setup.rst b/docs/basics/experiment_setup.rst index a24c1b12a..e4d82e96d 100644 --- a/docs/basics/experiment_setup.rst +++ b/docs/basics/experiment_setup.rst @@ -13,7 +13,7 @@ To setup an experiment with rlberry, you can use yaml files. You'll need: * yaml files describing the environments and the agents -* A main python script that reads the files and generates :class:`~rlberry.manager.agent_manager.AgentManager` instances to run each agent. +* A main python script that reads the files and generates :class:`~rlberry.manager.experiment_manager.ExperimentManager` instances to run each agent. This can be done very succinctly as in the example below: @@ -89,12 +89,12 @@ This can be done very succinctly as in the example below: multimanagers = MultipleManagers() - for agent_manager in experiment_generator(): - multimanagers.append(agent_manager) + for experiment_manager in experiment_generator(): + multimanagers.append(experiment_manager) # Alternatively: - # agent_manager.fit() - # agent_manager.save() + # experiment_manager.fit() + # experiment_manager.save() multimanagers.run() multimanagers.save() diff --git a/docs/basics/multiprocess.rst b/docs/basics/multiprocess.rst index d84f24ad7..de25cae22 100644 --- a/docs/basics/multiprocess.rst +++ b/docs/basics/multiprocess.rst @@ -4,7 +4,7 @@ Parallelization in rlberry ========================== rlberry use python's standard multiprocessing library to execute the fit of agents in parallel on cpus. The parallelization is done via -:class:`~rlberry.manager.AgentManager` and via :class:`~rlberry.manager.MultipleManagers`. +:class:`~rlberry.manager.ExperimentManager` and via :class:`~rlberry.manager.MultipleManagers`. If a user wants to use a third-party parallelization library like joblib, the user must be aware of where the seeding is done so as not to bias the results. rlberry automatically handles seeding when the native parallelization scheme are used. @@ -19,9 +19,9 @@ having practically no parallelization except if the code executed in each thread Process: spawn or forkserver ---------------------------- -To have an efficient parallelization, it is often better to use processes (see the doc on `python's website `_) using the parameter :code:`parallelization="process"` in :class:`~rlberry.manager.AgentManager` or :class:`~rlberry.manager.MultipleManagers`. +To have an efficient parallelization, it is often better to use processes (see the doc on `python's website `_) using the parameter :code:`parallelization="process"` in :class:`~rlberry.manager.ExperimentManager` or :class:`~rlberry.manager.MultipleManagers`. -This implies that a new process will be launched for each fit of the AgentManager. +This implies that a new process will be launched for each fit of the ExperimentManager. The advised method of parallelization is spawn (parameter :code:`mp_context="spawn"`), however spawn method has several drawbacks: @@ -30,14 +30,14 @@ The advised method of parallelization is spawn (parameter :code:`mp_context="spa .. code:: python from rlberry.agents.torch import A2CAgent - from rlberry.manager import AgentManager + from rlberry.manager import ExperimentManager from rlberry.envs.benchmarks.ball_exploration import PBall2D n_steps = 1e5 batch_size = 256 if __name__ == "__main__": - manager = AgentManager( + manager = ExperimentManager( A2CAgent, (PBall2D, {}), init_kwargs=dict(batch_size=batch_size, gamma=0.99, learning_rate=0.001), diff --git a/docs/basics/quick_start_rl/quickstart.rst b/docs/basics/quick_start_rl/quickstart.rst index bd7af2993..21341dac4 100644 --- a/docs/basics/quick_start_rl/quickstart.rst +++ b/docs/basics/quick_start_rl/quickstart.rst @@ -26,7 +26,7 @@ Importing required libraries from rlberry.agents import UCBVIAgent, AgentWithSimplePolicy from rlberry.envs import Chain from rlberry.manager import ( - AgentManager, + ExperimentManager, evaluate_agents, plot_writer_data, read_writer_data, @@ -149,11 +149,11 @@ module :class:`~rlberry.agents.Agent` for more informations. Agent Manager ------------- -One of the main feature of rlberry is its :class:`~rlberry.manager.AgentManager` +One of the main feature of rlberry is its :class:`~rlberry.manager.ExperimentManager` class. Here is a diagram to explain briefly what it does. -.. figure:: agent_manager_diagram.png +.. figure:: experiment_manager_diagram.png :align: center @@ -183,8 +183,8 @@ then spawn agents as desired during the experiment. .. code:: python - # Create AgentManager to fit 1 agent - ucbvi_stats = AgentManager( + # Create ExperimentManager to fit 1 agent + ucbvi_stats = ExperimentManager( UCBVIAgent, (env_ctor, env_kwargs), fit_budget=100, @@ -194,8 +194,8 @@ then spawn agents as desired during the experiment. ) ucbvi_stats.fit() - # Create AgentManager for baseline - baseline_stats = AgentManager( + # Create ExperimentManager for baseline + baseline_stats = ExperimentManager( RandomAgent, (env_ctor, env_kwargs), fit_budget=100, @@ -207,9 +207,9 @@ then spawn agents as desired during the experiment. .. parsed-literal:: - [INFO] Running AgentManager fit() for UCBVI with n_fit = 1 and max_workers = None. + [INFO] Running ExperimentManager fit() for UCBVI with n_fit = 1 and max_workers = None. [INFO] ... trained! - [INFO] Running AgentManager fit() for RandomAgent with n_fit = 1 and max_workers = None. + [INFO] Running ExperimentManager fit() for RandomAgent with n_fit = 1 and max_workers = None. [INFO] ... trained! @@ -307,8 +307,8 @@ Then, we fit the two agents and plot the data in the writer. .. code:: python - # Create AgentManager to fit 4 agents using 1 job - ucbvi_stats = AgentManager( + # Create ExperimentManager to fit 4 agents using 1 job + ucbvi_stats = ExperimentManager( UCBVIAgent2, (env_ctor, env_kwargs), fit_budget=50, @@ -319,8 +319,8 @@ Then, we fit the two agents and plot the data in the writer. ) # mp_context is needed to have parallel computing in notebooks. ucbvi_stats.fit() - # Create AgentManager for baseline - baseline_stats = AgentManager( + # Create ExperimentManager for baseline + baseline_stats = ExperimentManager( RandomAgent2, (env_ctor, env_kwargs), fit_budget=5000, @@ -330,8 +330,8 @@ Then, we fit the two agents and plot the data in the writer. ) baseline_stats.fit() - # Create AgentManager for baseline - opti_stats = AgentManager( + # Create ExperimentManager for baseline + opti_stats = ExperimentManager( OptimalAgent, (env_ctor, env_kwargs), fit_budget=5000, @@ -344,11 +344,11 @@ Then, we fit the two agents and plot the data in the writer. .. parsed-literal:: - [INFO] Running AgentManager fit() for UCBVIAgent2 with n_fit = 10 and max_workers = None. + [INFO] Running ExperimentManager fit() for UCBVIAgent2 with n_fit = 10 and max_workers = None. [INFO] ... trained! - [INFO] Running AgentManager fit() for RandomAgent2 with n_fit = 10 and max_workers = None. + [INFO] Running ExperimentManager fit() for RandomAgent2 with n_fit = 10 and max_workers = None. [INFO] ... trained! - [INFO] Running AgentManager fit() for OptimalAgent with n_fit = 10 and max_workers = None. + [INFO] Running ExperimentManager fit() for OptimalAgent with n_fit = 10 and max_workers = None. [INFO] ... trained! Remark that ``fit_budget`` may not mean the same thing among agents. For diff --git a/docs/basics/rlberry how to.rst b/docs/basics/rlberry how to.rst index eb9fd62ef..f84e79c5f 100644 --- a/docs/basics/rlberry how to.rst +++ b/docs/basics/rlberry how to.rst @@ -7,7 +7,7 @@ Libraries import pandas as pd from rlberry.agents import ValueIterationAgent, AgentWithSimplePolicy from rlberry.envs import GridWorld - from rlberry.manager import AgentManager, evaluate_agents + from rlberry.manager import ExperimentManager, evaluate_agents .. parsed-literal:: @@ -86,8 +86,8 @@ our estimation. .. code:: ipython3 - # Create AgentManager to fit 4 agents using 1 job - vi_stats = AgentManager( + # Create ExperimentManager to fit 4 agents using 1 job + vi_stats = ExperimentManager( ValueIterationAgent, (env_ctor, env_kwargs), fit_budget=0, @@ -96,8 +96,8 @@ our estimation. n_fit=1) vi_stats.fit() - # Create AgentManager for baseline - baseline_stats = AgentManager( + # Create ExperimentManager for baseline + baseline_stats = ExperimentManager( RandomAgent, (env_ctor, env_kwargs), fit_budget=0, @@ -108,9 +108,9 @@ our estimation. .. parsed-literal:: - [INFO] Running AgentManager fit() for ValueIteration... + [INFO] Running ExperimentManager fit() for ValueIteration... [INFO] ... trained! - [INFO] Running AgentManager fit() for RandomAgent... + [INFO] Running ExperimentManager fit() for RandomAgent... [INFO] ... trained! @@ -205,8 +205,8 @@ the variability of our estimation). .. code:: ipython3 - # Create AgentManager to fit 4 agents using 1 job - vi_stats = AgentManager( + # Create ExperimentManager to fit 4 agents using 1 job + vi_stats = ExperimentManager( ValueIterationAgent2, (env_ctor, env_kwargs), fit_budget=1, @@ -215,8 +215,8 @@ the variability of our estimation). n_fit=4) vi_stats.fit() - # Create AgentManager for baseline - baseline_stats = AgentManager( + # Create ExperimentManager for baseline + baseline_stats = ExperimentManager( RandomAgent2, (env_ctor, env_kwargs), fit_budget=1, @@ -227,9 +227,9 @@ the variability of our estimation). .. parsed-literal:: - [INFO] Running AgentManager fit() for ValueIterationAgent2... + [INFO] Running ExperimentManager fit() for ValueIterationAgent2... [INFO] ... trained! - [INFO] Running AgentManager fit() for RandomAgent2... + [INFO] Running ExperimentManager fit() for RandomAgent2... [INFO] ... trained! diff --git a/docs/basics/seeding.rst b/docs/basics/seeding.rst index f22ded51d..34a48c5b9 100644 --- a/docs/basics/seeding.rst +++ b/docs/basics/seeding.rst @@ -61,12 +61,12 @@ It works as follows: .. note:: - The class :class:`~rlberry.manager.agent_manager.AgentManager` provides a :code:`seed` parameter in its constructor, + The class :class:`~rlberry.manager.experiment_manager.ExperimentManager` provides a :code:`seed` parameter in its constructor, and handles automatically the seeding of all environments and agents used by it. .. note:: The :meth:`optimize_hyperparams` method of - :class:`~rlberry.manager.agent_manager.AgentManager` uses the `Optuna `_ + :class:`~rlberry.manager.experiment_manager.ExperimentManager` uses the `Optuna `_ library for hyperparameter optimization and is **inherently non-deterministic** (see `Optuna FAQ `_). diff --git a/docs/changelog.rst b/docs/changelog.rst index 0f9fa16d5..97e16e077 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -95,7 +95,7 @@ Version 0.3.0 *PR #191* -* Possibility to generate a profile with :class:`rlberry.agents.manager.AgentManager`. +* Possibility to generate a profile with :class:`rlberry.agents.manager.ExperimentManager`. *PR #148, #161, #180* @@ -113,8 +113,8 @@ Version 0.3.0 *Feb 22, 2022 (PR #126)* * Setup :code:`rlberry.__version__` (currently 0.3.0dev0) -* Record rlberry version in a AgentManager attribute equality of AgentManagers -* Override :code:`__eq__` method of the AgentManager class. +* Record rlberry version in a ExperimentManager attribute equality of ExperimentManagers +* Override :code:`__eq__` method of the ExperimentManager class. *Feb 14-15, 2022 (PR #97, #118)* @@ -126,10 +126,10 @@ Version 0.3.0 *Feb 11, 2022 (#83, #95)* * (fix) Fixed bug in :meth:`FiniteMDP.sample()`: terminal state was being checked with `self.state` instead of given `state` -* (feat) Option to use 'fork' or 'spawn' in :class:`~rlberry.manager.AgentManager` -* (feat) AgentManager output_dir now has a timestamp and a short ID by default. +* (feat) Option to use 'fork' or 'spawn' in :class:`~rlberry.manager.ExperimentManager` +* (feat) ExperimentManager output_dir now has a timestamp and a short ID by default. * (feat) Gridworld can be constructed from string layout -* (feat) `max_workers` argument for :class:`~rlberry.manager.AgentManager` to control the maximum number of processes/threads created by the :meth:`fit` method. +* (feat) `max_workers` argument for :class:`~rlberry.manager.ExperimentManager` to control the maximum number of processes/threads created by the :meth:`fit` method. *Feb 04, 2022* @@ -145,12 +145,12 @@ Version 0.2.1 ------------- -* :class:`~rlberry.agents.Agent` and :class:`~rlberry.manager.AgentManager` both have a unique_id attribute (useful for creating unique output files/directories). +* :class:`~rlberry.agents.Agent` and :class:`~rlberry.manager.ExperimentManager` both have a unique_id attribute (useful for creating unique output files/directories). * `DefaultWriter` is now initialized in base class `Agent` and (optionally) wraps a tensorboard `SummaryWriter`. -* :class:`~rlberry.manager.AgentManager` has an option enable_tensorboard that activates tensorboard logging in each of its Agents (with their writer attribute). The log_dirs of tensorboard are automatically assigned by :class:`~rlberry.manager.AgentManager`. -* `RemoteAgentManager` receives tensorboard data created in the server, when the method `get_writer_data()` is called. This is done by a zip file transfer with :class:`~rlberry.network`. +* :class:`~rlberry.manager.ExperimentManager` has an option enable_tensorboard that activates tensorboard logging in each of its Agents (with their writer attribute). The log_dirs of tensorboard are automatically assigned by :class:`~rlberry.manager.ExperimentManager`. +* `RemoteExperimentManager` receives tensorboard data created in the server, when the method `get_writer_data()` is called. This is done by a zip file transfer with :class:`~rlberry.network`. * `BaseWrapper` and `gym_make` now have an option `wrap_spaces`. If set to `True`, this option converts `gym.spaces` to `rlberry.spaces`, which provides classes with better seeding (using numpy's default_rng instead of `RandomState`) -* :class:`~rlberry.manager.AgentManager`: new method `get_agent_instances()` that returns trained instances +* :class:`~rlberry.manager.ExperimentManager`: new method `get_agent_instances()` that returns trained instances * `plot_writer_data`: possibility to set `xtag` (tag used for x-axis) * Fixed agent initialization bug in `AgentHandler` (`eval_env` missing in `kwargs` for agent_class). @@ -158,11 +158,11 @@ Version 0.2.1 Version 0.2 ----------- -* `AgentStats` renamed to :class:`~rlberry.manager.AgentManager`. -* :class:`~rlberry.manager.AgentManager` can handle agents that cannot be pickled. +* `AgentStats` renamed to :class:`~rlberry.manager.ExperimentManager`. +* :class:`~rlberry.manager.ExperimentManager` can handle agents that cannot be pickled. * Agent interface requires `eval()` method instead of `policy()` to handle more general agents (e.g. reward-free, POMDPs etc). * Multi-processing and multi-threading are now done with `ProcessPoolExecutor` and `ThreadPoolExecutor` (allowing nested processes for example). Processes are created with spawn (jax does not work with fork, see #51). * JAX implementation of DQN and replay buffer using reverb (experimental). * :class:`~rlberry.network`: server and client interfaces to exchange messages via sockets (experimental). -* `RemoteAgentManager` to train agents in a remote server and gather the results locally (experimental). +* `RemoteExperimentManager` to train agents in a remote server and gather the results locally (experimental). * Fix rendering bug with OpenGL diff --git a/docs/other/using_stable_baselines.rst b/docs/other/using_stable_baselines.rst index c1bf69b1b..21d33f5ef 100644 --- a/docs/other/using_stable_baselines.rst +++ b/docs/other/using_stable_baselines.rst @@ -38,7 +38,7 @@ There are two important implementation details to note: model. The `Stable Baselines`_ algorithm class is a **required** parameter of the -agent. In order to use it with AgentManagers, it must be included in the +agent. In order to use it with ExperimentManagers, it must be included in the `init_kwargs` parameter. For example, below we use rlberry_ to train several instances of the A2C implementation of `Stable Baselines`_ and evaluate two hyperparameter configurations. @@ -69,10 +69,10 @@ implementation of `Stable Baselines`_ and evaluate two hyperparameter configurat # Training several agents and comparing different hyperparams - from rlberry.manager import AgentManager, MultipleManagers, evaluate_agents + from rlberry.manager import ExperimentManager, MultipleManagers, evaluate_agents # Pass the wrapper directly with init_kwargs - stats = AgentManager( + stats = ExperimentManager( StableBaselinesAgent, (env_ctor, env_kwargs), agent_name="A2C baseline", @@ -87,7 +87,7 @@ implementation of `Stable Baselines`_ and evaluate two hyperparameter configurat ) # Pass a subclass for hyperparameter optimization - stats_alternative = AgentManager( + stats_alternative = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), agent_name="A2C optimized", diff --git a/examples/demo_agents/demo_SAC.py b/examples/demo_agents/demo_SAC.py index f1a55fa94..bb3ed8244 100644 --- a/examples/demo_agents/demo_SAC.py +++ b/examples/demo_agents/demo_SAC.py @@ -13,7 +13,7 @@ from rlberry.envs.basewrapper import Wrapper # from rlberry.envs import gym_make -from rlberry.manager import plot_writer_data, AgentManager +from rlberry.manager import plot_writer_data, ExperimentManager from rlberry.envs.benchmarks.ball_exploration import PBall2D from rlberry.agents.experimental.torch import SACAgent import gymnasium as gym @@ -27,7 +27,7 @@ def env_ctor(env, wrap_spaces=True): env = PBall2D() env = gym.wrappers.TimeLimit(env, max_episode_steps=100) env_kwargs = dict(env=env) -agent = AgentManager( +agent = ExperimentManager( SACAgent, (env_ctor, env_kwargs), fit_budget=500, @@ -37,11 +37,11 @@ def env_ctor(env, wrap_spaces=True): # basic version # env_kwargs = dict(id = "CartPole-v1") -# agent = AgentManager(SACAgent, (gym_make, env_kwargs), fit_budget=200, n_fit=1) +# agent = ExperimentManager(SACAgent, (gym_make, env_kwargs), fit_budget=200, n_fit=1) # # timothe's # env = gym_make("CartPole-v1") -# agent = AgentManager( +# agent = ExperimentManager( # SACAgent, (env.__class__, dict()), fit_budget=200, n_fit=1, enable_tensorboard=True, # ) @@ -50,7 +50,7 @@ def env_ctor(env, wrap_spaces=True): # from copy import deepcopy # def env_constructor(): # return deepcopy(env) -# agent = AgentManager( +# agent = ExperimentManager( # SACAgent, (env_constructor, dict()), fit_budget=200, n_fit=1, enable_tensorboard=True, # ) diff --git a/examples/demo_bandits/plot_TS_bandit.py b/examples/demo_bandits/plot_TS_bandit.py index baf549229..599033dbc 100644 --- a/examples/demo_bandits/plot_TS_bandit.py +++ b/examples/demo_bandits/plot_TS_bandit.py @@ -20,7 +20,7 @@ makeBetaPrior, makeGaussianPrior, ) -from rlberry.manager import AgentManager, plot_writer_data +from rlberry.manager import ExperimentManager, plot_writer_data from rlberry.wrappers import WriterWrapper @@ -63,7 +63,7 @@ def __init__(self, env, **kwargs): env_kwargs = {"p": means} agents = [ - AgentManager( + ExperimentManager( Agent, (env_ctor, env_kwargs), fit_budget=T, @@ -129,7 +129,7 @@ def __init__(self, env, sigma=1.0, **kwargs): env_kwargs = {"means": means, "stds": sigma * np.ones(A)} agents = [ - AgentManager( + ExperimentManager( Agent, (env_ctor, env_kwargs), fit_budget=T, diff --git a/examples/demo_bandits/plot_compare_index_bandits.py b/examples/demo_bandits/plot_compare_index_bandits.py index 7dca3e404..f089c5ac3 100644 --- a/examples/demo_bandits/plot_compare_index_bandits.py +++ b/examples/demo_bandits/plot_compare_index_bandits.py @@ -9,7 +9,7 @@ import numpy as np import matplotlib.pyplot as plt from rlberry.envs.bandits import BernoulliBandit -from rlberry.manager import AgentManager, plot_writer_data +from rlberry.manager import ExperimentManager, plot_writer_data from rlberry.wrappers import WriterWrapper from rlberry.agents.bandits import ( IndexAgent, @@ -129,7 +129,7 @@ def __init__(self, env, **kwargs): ] agents = [ - AgentManager( + ExperimentManager( Agent, (env_ctor, env_kwargs), fit_budget=T, diff --git a/examples/demo_bandits/plot_exp3_bandit.py b/examples/demo_bandits/plot_exp3_bandit.py index ff3b25d00..f4716a219 100644 --- a/examples/demo_bandits/plot_exp3_bandit.py +++ b/examples/demo_bandits/plot_exp3_bandit.py @@ -15,7 +15,7 @@ makeEXP3Index, makeBetaPrior, ) -from rlberry.manager import AgentManager, plot_writer_data +from rlberry.manager import ExperimentManager, plot_writer_data from rlberry.wrappers import WriterWrapper @@ -81,7 +81,7 @@ def switching_rewards(T, gap=0.1, rate=1.6): Agents_class = [EXP3Agent, BernoulliTSAgent] agents = [ - AgentManager( + ExperimentManager( Agent, (env_ctor, env_kwargs), init_kwargs={}, diff --git a/examples/demo_bandits/plot_mirror_bandit.py b/examples/demo_bandits/plot_mirror_bandit.py index 8480e6832..4e9b9757d 100644 --- a/examples/demo_bandits/plot_mirror_bandit.py +++ b/examples/demo_bandits/plot_mirror_bandit.py @@ -14,7 +14,7 @@ """ import numpy as np -from rlberry.manager import AgentManager, read_writer_data +from rlberry.manager import ExperimentManager, read_writer_data from rlberry.envs.interface import Model from rlberry.agents.bandits import BanditWithSimplePolicy from rlberry.wrappers import WriterWrapper @@ -158,7 +158,7 @@ def fit(self, budget=None, **kwargs): # Experiment -agent = AgentManager( +agent = ExperimentManager( SeqHalvAgent, (env_ctor, env_kwargs), fit_budget=100, # we use only 100 iterations for faster example run in doc. diff --git a/examples/demo_bandits/plot_ucb_bandit.py b/examples/demo_bandits/plot_ucb_bandit.py index c154333e5..92b9d8ae2 100644 --- a/examples/demo_bandits/plot_ucb_bandit.py +++ b/examples/demo_bandits/plot_ucb_bandit.py @@ -9,7 +9,7 @@ import numpy as np from rlberry.envs.bandits import NormalBandit from rlberry.agents.bandits import IndexAgent, makeSubgaussianUCBIndex -from rlberry.manager import AgentManager, plot_writer_data +from rlberry.manager import ExperimentManager, plot_writer_data import matplotlib.pyplot as plt from rlberry.wrappers import WriterWrapper @@ -38,7 +38,7 @@ def __init__(self, env, sigma=1, **kwargs): env_ctor = NormalBandit env_kwargs = {"means": means, "stds": 2 * np.ones(len(means))} -agent = AgentManager( +agent = ExperimentManager( UCBAgent, (env_ctor, env_kwargs), fit_budget=T, diff --git a/examples/demo_env/example_atari_atlantis_vectorized_ppo.py b/examples/demo_env/example_atari_atlantis_vectorized_ppo.py index 05c5ce8b8..6fc1c187b 100644 --- a/examples/demo_env/example_atari_atlantis_vectorized_ppo.py +++ b/examples/demo_env/example_atari_atlantis_vectorized_ppo.py @@ -14,7 +14,7 @@ # sphinx_gallery_thumbnail_path = 'thumbnails/example_plot_atari_atlantis_vectorized_ppo.jpg' -from rlberry.manager.agent_manager import AgentManager +from rlberry.manager import ExperimentManager from datetime import datetime from rlberry.agents.torch import PPOAgent from gymnasium.wrappers.record_video import RecordVideo @@ -67,7 +67,7 @@ } -tuned_agent = AgentManager( +tuned_agent = ExperimentManager( PPOAgent, # The Agent class. ( atari_make, diff --git a/examples/demo_env/example_atari_breakout_vectorized_ppo.py b/examples/demo_env/example_atari_breakout_vectorized_ppo.py index 712136588..d96221dc3 100644 --- a/examples/demo_env/example_atari_breakout_vectorized_ppo.py +++ b/examples/demo_env/example_atari_breakout_vectorized_ppo.py @@ -14,7 +14,7 @@ # sphinx_gallery_thumbnail_path = 'thumbnails/example_plot_atari_breakout_vectorized_ppo.jpg' -from rlberry.manager.agent_manager import AgentManager +from rlberry.manager import ExperimentManager from datetime import datetime from rlberry.agents.torch import PPOAgent from gymnasium.wrappers.record_video import RecordVideo @@ -67,7 +67,7 @@ } -tuned_agent = AgentManager( +tuned_agent = ExperimentManager( PPOAgent, # The Agent class. ( atari_make, diff --git a/examples/demo_env/video_plot_atari_freeway.py b/examples/demo_env/video_plot_atari_freeway.py index 264f4524a..f8e22f2f9 100644 --- a/examples/demo_env/video_plot_atari_freeway.py +++ b/examples/demo_env/video_plot_atari_freeway.py @@ -14,7 +14,7 @@ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_atari_freeway.jpg' -from rlberry.manager.agent_manager import AgentManager +from rlberry.manager import ExperimentManager from datetime import datetime from rlberry.agents.torch.dqn.dqn import DQNAgent from gymnasium.wrappers.record_video import RecordVideo @@ -45,7 +45,7 @@ "is_policy": False, # The network should output a distribution } -tuned_agent = AgentManager( +tuned_agent = ExperimentManager( DQNAgent, # The Agent class. ( atari_make, diff --git a/examples/demo_experiment/run.py b/examples/demo_experiment/run.py index d25534ef1..87741dd66 100644 --- a/examples/demo_experiment/run.py +++ b/examples/demo_experiment/run.py @@ -19,8 +19,8 @@ if __name__ == "__main__": multimanagers = MultipleManagers(parallelization="thread") - for agent_manager in experiment_generator(): - multimanagers.append(agent_manager) + for experiment_manager in experiment_generator(): + multimanagers.append(experiment_manager) multimanagers.run() multimanagers.save() diff --git a/examples/demo_network/run_client.py b/examples/demo_network/run_client.py index 0b3361fa5..5098f5311 100644 --- a/examples/demo_network/run_client.py +++ b/examples/demo_network/run_client.py @@ -11,7 +11,7 @@ port = int(input("Select server port: ")) client = BerryClient(port=port) -# Send params for AgentManager +# Send params for ExperimentManager client.send( Message.create( command=interface.Command.AGENT_MANAGER_CREATE_INSTANCE, diff --git a/examples/demo_network/run_remote_manager.py b/examples/demo_network/run_remote_manager.py index e002dbc21..83df52486 100644 --- a/examples/demo_network/run_remote_manager.py +++ b/examples/demo_network/run_remote_manager.py @@ -9,9 +9,7 @@ from rlberry.agents.torch import REINFORCEAgent -from rlberry.manager.agent_manager import AgentManager -from rlberry.manager.multiple_managers import MultipleManagers -from rlberry.manager.remote_agent_manager import RemoteAgentManager +from rlberry.manager import ExperimentManager, MultipleManagers, RemoteExperimentManager from rlberry.manager.evaluation import evaluate_agents, plot_writer_data @@ -21,7 +19,7 @@ FIT_BUDGET = 500 - local_manager = AgentManager( + local_manager = ExperimentManager( agent_class=REINFORCEAgent, train_env=(gym_make, dict(id="CartPole-v1")), fit_budget=FIT_BUDGET, @@ -33,7 +31,7 @@ parallelization="process", ) - remote_manager = RemoteAgentManager( + remote_manager = RemoteExperimentManager( client, agent_class=ResourceRequest(name="REINFORCEAgent"), train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v1")), @@ -62,7 +60,7 @@ # Test save/load fname1 = remote_manager.save() del remote_manager - remote_manager = RemoteAgentManager.load(fname1) + remote_manager = RemoteExperimentManager.load(fname1) # Fit everything in parallel mmanagers = MultipleManagers(parallelization="thread") diff --git a/examples/plot_agent_manager.py b/examples/plot_agent_manager.py index ab160ec99..338ee417d 100644 --- a/examples/plot_agent_manager.py +++ b/examples/plot_agent_manager.py @@ -98,13 +98,13 @@ def policy(self, observation): return self.env.action_space.sample() -from rlberry.manager import AgentManager, evaluate_agents +from rlberry.manager import ExperimentManager, evaluate_agents # Define parameters vi_params = {"gamma": 0.1, "epsilon": 1e-3} -# Create AgentManager to fit 4 agents using 1 job -vi_stats = AgentManager( +# Create ExperimentManager to fit 4 agents using 1 job +vi_stats = ExperimentManager( ValueIterationAgent, (env_ctor, env_kwargs), fit_budget=0, @@ -114,8 +114,8 @@ def policy(self, observation): ) vi_stats.fit() -# Create AgentManager for baseline -baseline_stats = AgentManager( +# Create ExperimentManager for baseline +baseline_stats = ExperimentManager( RandomAgent, (env_ctor, env_kwargs), fit_budget=0, diff --git a/examples/plot_checkpointing.py b/examples/plot_checkpointing.py index a7c5182df..3de689cb6 100644 --- a/examples/plot_checkpointing.py +++ b/examples/plot_checkpointing.py @@ -9,7 +9,7 @@ your agents, and how to restore from a previous checkpoint. """ from rlberry.agents import Agent -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager from rlberry.manager import plot_writer_data @@ -55,19 +55,19 @@ def eval(self, **kwargs): if __name__ == "__main__": - manager = AgentManager( + manager = ExperimentManager( MyAgent, fit_budget=-1, n_fit=2, seed=123, ) # Save manager **before** fit for several timesteps! So that we can see why checkpoints are useful: - # even if AgentManager is interrupted, it can be loaded and continue training + # even if ExperimentManager is interrupted, it can be loaded and continue training # from last checkpoint. - # But first, we need an initial call to AgentManager.fit() (for zero or a small number of timesteps), - # so that AgentManager can instantiate MyAgent and it can start checkpointing itself. + # But first, we need an initial call to ExperimentManager.fit() (for zero or a small number of timesteps), + # so that ExperimentManager can instantiate MyAgent and it can start checkpointing itself. # This is because the __init__ method of MyAgent is only executed - # after the first call to AgentManager.fit(). + # after the first call to ExperimentManager.fit(). manager.fit(0) manager_file = manager.save() print(f"\n Saved manager at {manager_file}.\n") @@ -82,7 +82,7 @@ def eval(self, **kwargs): # Load manager and continue training from last checkpoint print(f"\n Loading manager from {manager_file}.\n") - loaded_manager = AgentManager.load(manager_file) + loaded_manager = ExperimentManager.load(manager_file) # Fit for 500 more timesteps loaded_manager.fit(500) diff --git a/examples/plot_writer_wrapper.py b/examples/plot_writer_wrapper.py index 3c2a06066..069d4de00 100644 --- a/examples/plot_writer_wrapper.py +++ b/examples/plot_writer_wrapper.py @@ -24,7 +24,7 @@ from rlberry.wrappers import WriterWrapper from rlberry.envs import GridWorld -from rlberry.manager import plot_writer_data, AgentManager +from rlberry.manager import plot_writer_data, ExperimentManager from rlberry.agents import UCBVIAgent import matplotlib.pyplot as plt @@ -53,7 +53,7 @@ def __init__(self, env, **kwargs): ) env = env_ctor(**env_kwargs) -agent = AgentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3) +agent = ExperimentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3) agent.fit(budget=10) # comment the line above if you only want to load data from rlberry_data. diff --git a/long_tests/rl_agent/ltest_mbqvi_applegold.py b/long_tests/rl_agent/ltest_mbqvi_applegold.py index 083562bde..5aa85aa25 100644 --- a/long_tests/rl_agent/ltest_mbqvi_applegold.py +++ b/long_tests/rl_agent/ltest_mbqvi_applegold.py @@ -1,6 +1,6 @@ from rlberry.envs.benchmarks.grid_exploration.apple_gold import AppleGold from rlberry.agents.mbqvi import MBQVIAgent -from rlberry.manager import AgentManager, evaluate_agents +from rlberry.manager import ExperimentManager, evaluate_agents import numpy as np params = {} @@ -11,7 +11,7 @@ # hyperparameters from https://github.com/DLR-RM/rl-baselines3-zoo def test_mbqvi_applegold(): - rbagent = AgentManager( + rbagent = ExperimentManager( MBQVIAgent, (AppleGold, None), init_kwargs=params, diff --git a/long_tests/torch_agent/ltest_a2c_cartpole.py b/long_tests/torch_agent/ltest_a2c_cartpole.py index c353855aa..ce22dc0f3 100644 --- a/long_tests/torch_agent/ltest_a2c_cartpole.py +++ b/long_tests/torch_agent/ltest_a2c_cartpole.py @@ -1,6 +1,6 @@ from rlberry.envs import gym_make from rlberry.agents.torch import A2CAgent -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager from rlberry.agents.torch.utils.training import model_factory_from_env import numpy as np @@ -24,7 +24,7 @@ def test_a2c_cartpole(): env_ctor = gym_make env_kwargs = dict(id="CartPole-v1") - rbagent = AgentManager( + rbagent = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), agent_name="A2CAgent", diff --git a/long_tests/torch_agent/ltest_ctn_ppo_a2c_pendulum.py b/long_tests/torch_agent/ltest_ctn_ppo_a2c_pendulum.py index 89ccfc6f5..01b10b29f 100644 --- a/long_tests/torch_agent/ltest_ctn_ppo_a2c_pendulum.py +++ b/long_tests/torch_agent/ltest_ctn_ppo_a2c_pendulum.py @@ -1,6 +1,6 @@ from rlberry.envs import gym_make from rlberry.agents.torch import A2CAgent, PPOAgent -from rlberry.manager import AgentManager, plot_writer_data, evaluate_agents +from rlberry.manager import ExperimentManager, plot_writer_data, evaluate_agents import seaborn as sns import matplotlib.pyplot as plt @@ -15,7 +15,7 @@ def test_a2c_vs_ppo_pendul(): env_ctor = gym_make env_kwargs = dict(id="Pendulum-v1") - a2cagent = AgentManager( + a2cagent = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), agent_name="A2CAgent", @@ -31,7 +31,7 @@ def test_a2c_vs_ppo_pendul(): learning_rate=0.001, k_epochs=10, ) - ppoagent = AgentManager( + ppoagent = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), init_kwargs=ppo_init_kwargs, diff --git a/long_tests/torch_agent/ltest_dqn_montaincar.py b/long_tests/torch_agent/ltest_dqn_montaincar.py index f36f662e9..553fc9580 100644 --- a/long_tests/torch_agent/ltest_dqn_montaincar.py +++ b/long_tests/torch_agent/ltest_dqn_montaincar.py @@ -1,6 +1,6 @@ from rlberry.envs import gym_make from rlberry.agents.torch import DQNAgent -from rlberry.manager import AgentManager, evaluate_agents +from rlberry.manager import ExperimentManager, evaluate_agents import numpy as np model_configs = { @@ -15,7 +15,7 @@ def test_dqn_montaincar(): env_ctor = gym_make env_kwargs = dict(id="MountainCar-v0") - rbagent = AgentManager( + rbagent = ExperimentManager( DQNAgent, (env_ctor, env_kwargs), init_kwargs=dict( diff --git a/long_tests/torch_agent/ltest_dqn_vs_mdqn_acrobot.py b/long_tests/torch_agent/ltest_dqn_vs_mdqn_acrobot.py index da8418dca..9d17ea936 100644 --- a/long_tests/torch_agent/ltest_dqn_vs_mdqn_acrobot.py +++ b/long_tests/torch_agent/ltest_dqn_vs_mdqn_acrobot.py @@ -1,7 +1,7 @@ from rlberry.envs import gym_make from rlberry.agents.torch import DQNAgent from rlberry.agents.torch import MunchausenDQNAgent as MDQNAgent -from rlberry.manager import AgentManager, evaluate_agents, plot_writer_data +from rlberry.manager import ExperimentManager, evaluate_agents, plot_writer_data import matplotlib.pyplot as plt import seaborn as sns @@ -49,7 +49,7 @@ def test_dqn_vs_mdqn_acro(): learning_starts=5_000, ) - dqnagent = AgentManager( + dqnagent = ExperimentManager( DQNAgent, (env_ctor, env_kwargs), init_kwargs=dqn_init_kwargs, @@ -60,7 +60,7 @@ def test_dqn_vs_mdqn_acro(): # mp_context="fork", ) - mdqnagent = AgentManager( + mdqnagent = ExperimentManager( MDQNAgent, (env_ctor, env_kwargs), init_kwargs=mdqn_init_kwargs, diff --git a/rlberry/agents/agent.py b/rlberry/agents/agent.py index 104df207d..a7c81e5fb 100644 --- a/rlberry/agents/agent.py +++ b/rlberry/agents/agent.py @@ -43,12 +43,12 @@ class Agent(ABC): Directory that the agent can use to store data. _execution_metadata : ExecutionMetadata, optional Extra information about agent execution (e.g. about which is the process id where the agent is running). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _default_writer_kwargs : dict, optional Parameters to initialize :class:`~rlberry.utils.writers.DefaultWriter` (attribute self.writer). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _thread_shared_data : dict, optional - Used by :class:`~rlberry.manager.AgentManager` to share data across Agent + Used by :class:`~rlberry.manager.ExperimentManager` to share data across Agent instances created in different threads. **kwargs : dict Classes that implement this interface must send ``**kwargs`` @@ -435,10 +435,10 @@ class AgentWithSimplePolicy(Agent): Directory that the agent can use to store data. _execution_metadata : ExecutionMetadata, optional Extra information about agent execution (e.g. about which is the process id where the agent is running). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _default_writer_kwargs : dict, optional Parameters to initialize :class:`~rlberry.utils.writers.DefaultWriter` (attribute self.writer). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. **kwargs : dict Classes that implement this interface must send ``**kwargs`` to :code:`AgentWithSimplePolicy.__init__()`. @@ -547,12 +547,12 @@ class AgentTorch(Agent): Directory that the agent can use to store data. _execution_metadata : ExecutionMetadata, optional Extra information about agent execution (e.g. about which is the process id where the agent is running). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _default_writer_kwargs : dict, optional Parameters to initialize :class:`~rlberry.utils.writers.DefaultWriter` (attribute self.writer). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _thread_shared_data : dict, optional - Used by :class:`~rlberry.manager.AgentManager` to share data across Agent + Used by :class:`~rlberry.manager.ExperimentManager` to share data across Agent instances created in different threads. **kwargs : dict Classes that implement this interface must send ``**kwargs`` diff --git a/rlberry/agents/stable_baselines/stable_baselines.py b/rlberry/agents/stable_baselines/stable_baselines.py index 871ba0fb2..ffc43d099 100644 --- a/rlberry/agents/stable_baselines/stable_baselines.py +++ b/rlberry/agents/stable_baselines/stable_baselines.py @@ -96,10 +96,10 @@ class StableBaselinesAgent(AgentWithSimplePolicy): Directory that the agent can use to store data. _execution_metadata : ExecutionMetadata, optional Extra information about agent execution (e.g. about which is the process id where the agent is running). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. _default_writer_kwargs : dict, optional Parameters to initialize :class:`~rlberry.utils.writers.DefaultWriter` (attribute self.writer). - Used by :class:`~rlberry.manager.AgentManager`. + Used by :class:`~rlberry.manager.ExperimentManager`. Examples -------- diff --git a/rlberry/agents/torch/tests/test_a2c.py b/rlberry/agents/torch/tests/test_a2c.py index 085bd6e97..057649705 100644 --- a/rlberry/agents/torch/tests/test_a2c.py +++ b/rlberry/agents/torch/tests/test_a2c.py @@ -1,6 +1,6 @@ from rlberry.envs import Wrapper from rlberry.agents.torch import A2CAgent -from rlberry.manager import AgentManager, evaluate_agents +from rlberry.manager import ExperimentManager, evaluate_agents from rlberry.envs.benchmarks.ball_exploration import PBall2D from gymnasium import make @@ -11,7 +11,7 @@ def test_a2c(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - a2crlberry_stats = AgentManager( + a2crlberry_stats = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), fit_budget=int(100), @@ -30,7 +30,7 @@ def test_a2c(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - a2crlberry_stats = AgentManager( + a2crlberry_stats = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), fit_budget=int(100), @@ -50,7 +50,7 @@ def test_a2c(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - a2crlberry_stats = AgentManager( + a2crlberry_stats = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), fit_budget=int(100), @@ -68,7 +68,7 @@ def test_a2c(): env_ctor = PBall2D env_kwargs = dict() - a2crlberry_stats = AgentManager( + a2crlberry_stats = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), fit_budget=int(100), @@ -89,7 +89,7 @@ def test_a2c(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - a2crlberry_stats = AgentManager( + a2crlberry_stats = ExperimentManager( A2CAgent, (env_ctor, env_kwargs), fit_budget=int(100), diff --git a/rlberry/agents/torch/tests/test_dqn.py b/rlberry/agents/torch/tests/test_dqn.py index c3d7d87b2..5b6848fb7 100644 --- a/rlberry/agents/torch/tests/test_dqn.py +++ b/rlberry/agents/torch/tests/test_dqn.py @@ -2,7 +2,7 @@ from rlberry.envs import gym_make from rlberry.agents.torch.dqn import DQNAgent from rlberry.agents.torch.utils.training import model_factory -from rlberry.manager.agent_manager import AgentManager +from rlberry.manager import ExperimentManager import os import pathlib @@ -83,13 +83,13 @@ def test_dqn_classic_env(): observation = next_observation -def test_dqn_agent_manager_classic_env(): +def test_dqn_experiment_manager_classic_env(): # saving_path = "rlberry/agents/torch/tests/agentmanager_test_dqn_classic_env" with tempfile.TemporaryDirectory() as tmpdirname: saving_path = tmpdirname + "/agentmanager_test_dqn_classic_env" - test_agent_manager = AgentManager( + test_experiment_manager = ExperimentManager( DQNAgent, # The Agent class. ( gym_make, @@ -115,22 +115,22 @@ def test_dqn_agent_manager_classic_env(): output_dir=saving_path, ) - test_agent_manager.fit(budget=200) + test_experiment_manager.fit(budget=200) # test the save function - test_agent_manager.save() + test_experiment_manager.save() assert os.path.exists(saving_path) # test the loading function test_load_env = gym_make("CartPole-v1") path_to_load = next(pathlib.Path(saving_path).glob("**/*.pickle")) - loaded_agent_manager = AgentManager.load(path_to_load) - assert loaded_agent_manager + loaded_experiment_manager = ExperimentManager.load(path_to_load) + assert loaded_experiment_manager # test the agent state, info = test_load_env.reset() for tt in range(50): - action = loaded_agent_manager.get_agent_instances()[0].policy(state) + action = loaded_experiment_manager.get_agent_instances()[0].policy(state) next_s, _, terminated, truncated, test = test_load_env.step(action) done = terminated or truncated if done: diff --git a/rlberry/agents/torch/tests/test_ppo.py b/rlberry/agents/torch/tests/test_ppo.py index ce9a50ae4..ed31465cb 100644 --- a/rlberry/agents/torch/tests/test_ppo.py +++ b/rlberry/agents/torch/tests/test_ppo.py @@ -10,7 +10,7 @@ import pytest from rlberry.envs import Wrapper from rlberry.agents.torch import PPOAgent -from rlberry.manager import AgentManager, evaluate_agents +from rlberry.manager import ExperimentManager, evaluate_agents from rlberry.envs.benchmarks.ball_exploration import PBall2D from gymnasium import make from rlberry.agents.torch.utils.training import model_factory_from_env @@ -25,7 +25,7 @@ def test_ppo(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -45,7 +45,7 @@ def test_ppo(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -65,7 +65,7 @@ def test_ppo(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -82,7 +82,7 @@ def test_ppo(): env_ctor = PBall2D env_kwargs = dict() - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -103,7 +103,7 @@ def test_ppo(): env_ctor = Wrapper env_kwargs = dict(env=mdp) - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -133,7 +133,7 @@ def test_ppo(): ) pporlberry_stats.fit() - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -168,7 +168,7 @@ def test_ppo(): env_ctor = PBall2D env_kwargs = dict() - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), @@ -183,7 +183,7 @@ def test_ppo(): output = evaluate_agents([pporlberry_stats], n_simulations=2, plot=False) pporlberry_stats.clear_output_dir() - pporlberry_stats = AgentManager( + pporlberry_stats = ExperimentManager( PPOAgent, (env_ctor, env_kwargs), fit_budget=int(132), diff --git a/rlberry/agents/torch/tests/test_torch_atari.py b/rlberry/agents/torch/tests/test_torch_atari.py index 694fcfaf3..bb7629c78 100644 --- a/rlberry/agents/torch/tests/test_torch_atari.py +++ b/rlberry/agents/torch/tests/test_torch_atari.py @@ -1,8 +1,7 @@ -from rlberry.manager.agent_manager import AgentManager +from rlberry.manager import ExperimentManager from rlberry.agents.torch.dqn.dqn import DQNAgent from rlberry.envs.gym_make import atari_make -from rlberry.manager.agent_manager import AgentManager from rlberry.agents.torch import PPOAgent from rlberry.agents.torch.utils.training import model_factory_from_env import pathlib @@ -33,7 +32,7 @@ def test_forward_dqn(): "is_policy": False, # The network should output a distribution } - tuned_agent = AgentManager( + tuned_agent = ExperimentManager( DQNAgent, # The Agent class. ( atari_make, @@ -80,7 +79,7 @@ def test_forward_empty_input_dim(): "is_policy": False, # The network should output a distribution } - tuned_agent = AgentManager( + tuned_agent = ExperimentManager( DQNAgent, # The Agent class. ( atari_make, @@ -197,7 +196,7 @@ def test_ppo_vectorized_atari_env(num_envs): @pytest.mark.parametrize("num_envs", [1, 3]) -def test_ppo_agent_manager_vectorized_atari_env(num_envs): +def test_ppo_experiment_manager_vectorized_atari_env(num_envs): with tempfile.TemporaryDirectory() as tmpdirname: saving_path = tmpdirname + "/agentmanager_test_ppo_vectorized_env" @@ -239,7 +238,7 @@ def test_ppo_agent_manager_vectorized_atari_env(num_envs): "out_size": 1, } - test_agent_manager = AgentManager( + test_experiment_manager = ExperimentManager( PPOAgent, # The Agent class. ( atari_make, @@ -265,23 +264,23 @@ def test_ppo_agent_manager_vectorized_atari_env(num_envs): output_dir=saving_path, # eval_env = (atari_make,dict(id="ALE/Atlantis-v5",n_envs=1)) ) - test_agent_manager.fit(budget=500) + test_experiment_manager.fit(budget=500) # test the save function - test_agent_manager.save() + test_experiment_manager.save() assert os.path.exists(saving_path) # test the loading function test_load_env = atari_make("ALE/Atlantis-v5") test_load_env.reset() path_to_load = next(pathlib.Path(saving_path).glob("**/*.pickle")) - loaded_agent_manager = AgentManager.load(path_to_load) - assert loaded_agent_manager + loaded_experiment_manager = ExperimentManager.load(path_to_load) + assert loaded_experiment_manager # test the agent obs, infos = test_load_env.reset() for tt in range(50): - actions = loaded_agent_manager.get_agent_instances()[0].policy(obs) + actions = loaded_experiment_manager.get_agent_instances()[0].policy(obs) obs, reward, terminated, truncated, info = test_load_env.step(actions) done = np.logical_or(terminated, truncated) if done: diff --git a/rlberry/envs/tests/test_gym_make.py b/rlberry/envs/tests/test_gym_make.py index 73a9a615b..9ad80d2ee 100644 --- a/rlberry/envs/tests/test_gym_make.py +++ b/rlberry/envs/tests/test_gym_make.py @@ -22,7 +22,7 @@ def test_atari_make(): def test_rendering_with_atari_make(): - from rlberry.manager.agent_manager import AgentManager + from rlberry.manager import ExperimentManager from rlberry.agents.torch import PPOAgent from gymnasium.wrappers.record_video import RecordVideo import os @@ -69,7 +69,7 @@ def test_rendering_with_atari_make(): "out_size": 1, } - tuned_agent = AgentManager( + tuned_agent = ExperimentManager( PPOAgent, # The Agent class. ( atari_make, diff --git a/rlberry/experiment/generator.py b/rlberry/experiment/generator.py index 5adb9ed9d..1dbd6f1f8 100644 --- a/rlberry/experiment/generator.py +++ b/rlberry/experiment/generator.py @@ -6,17 +6,17 @@ Options: -h --help Show this screen. - --enable_tensorboard Enable tensorboard writer in AgentManager. + --enable_tensorboard Enable tensorboard writer in ExperimentManager. --n_fit= Number of times each agent is fit [default: 4]. --output_dir= Directory to save the results [default: results]. --parallelization= Either 'thread' or 'process' [default: process]. - --max_workers= Number of workers used by AgentManager.fit. Set to -1 for the maximum value. [default: -1] + --max_workers= Number of workers used by ExperimentManager.fit. Set to -1 for the maximum value. [default: -1] """ from docopt import docopt from pathlib import Path from rlberry.experiment.yaml_utils import parse_experiment_config -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager from rlberry import check_packages import rlberry @@ -26,13 +26,13 @@ def experiment_generator(): """ - Parse command line arguments and yields AgentManager instances. + Parse command line arguments and yields ExperimentManager instances. """ args = docopt(__doc__) max_workers = int(args["--max_workers"]) if max_workers == -1: max_workers = None - for _, agent_manager_kwargs in parse_experiment_config( + for _, experiment_manager_kwargs in parse_experiment_config( Path(args[""]), n_fit=int(args["--n_fit"]), max_workers=max_workers, @@ -41,10 +41,10 @@ def experiment_generator(): ): if args["--enable_tensorboard"]: if check_packages.TENSORBOARD_INSTALLED: - agent_manager_kwargs.update(dict(enable_tensorboard=True)) + experiment_manager_kwargs.update(dict(enable_tensorboard=True)) else: logger.warning( "Option --enable_tensorboard is not available: tensorboard is not installed." ) - yield AgentManager(**agent_manager_kwargs) + yield ExperimentManager(**experiment_manager_kwargs) diff --git a/rlberry/experiment/load_results.py b/rlberry/experiment/load_results.py index 58d74eea6..f66819f5d 100644 --- a/rlberry/experiment/load_results.py +++ b/rlberry/experiment/load_results.py @@ -1,5 +1,5 @@ from pathlib import Path -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager import pandas as pd @@ -42,7 +42,7 @@ def load_experiment_results(output_dir, experiment_name): output_data['experiment_dirs'] = list of paths to experiment directory (output_dir/experiment_name) output_data['agent_list'] = list containing the names of the agents in the experiment - output_data['manager'][agent_name] = fitted AgentManager for agent_name + output_data['manager'][agent_name] = fitted ExperimentManager for agent_name output_data['dataframes'][agent_name] = dict of pandas data frames from the last run of the experiment output_data['data_dir'][agent_name] = directory from which the results were loaded """ @@ -91,13 +91,15 @@ def load_experiment_results(output_dir, experiment_name): # store data_dir output_data["data_dir"][agent_name] = data_dirs[agent_name] - # store AgentManager + # store ExperimentManager output_data["manager"][agent_name] = None fname = data_dirs[agent_name] / "manager_obj.pickle" try: - output_data["manager"][agent_name] = AgentManager.load(fname) + output_data["manager"][agent_name] = ExperimentManager.load(fname) except Exception: - logger.warning(f"Could not load AgentManager instance for {agent_name}.") + logger.warning( + f"Could not load ExperimentManager instance for {agent_name}." + ) logger.info("... loaded " + str(fname)) # store data frames diff --git a/rlberry/experiment/tests/test_experiment_generator.py b/rlberry/experiment/tests/test_experiment_generator.py index 3b7147292..2c5297198 100644 --- a/rlberry/experiment/tests/test_experiment_generator.py +++ b/rlberry/experiment/tests/test_experiment_generator.py @@ -10,34 +10,34 @@ def test_mock_args(monkeypatch): ) random_numbers = [] - for agent_manager in experiment_generator(): - rng = agent_manager.seeder.rng + for experiment_manager in experiment_generator(): + rng = experiment_manager.seeder.rng random_numbers.append(rng.uniform(size=10)) - assert agent_manager.agent_class is RSUCBVIAgent - assert agent_manager._base_init_kwargs["horizon"] == 2 - assert agent_manager.fit_budget == 3 - assert agent_manager.eval_kwargs["eval_horizon"] == 4 + assert experiment_manager.agent_class is RSUCBVIAgent + assert experiment_manager._base_init_kwargs["horizon"] == 2 + assert experiment_manager.fit_budget == 3 + assert experiment_manager.eval_kwargs["eval_horizon"] == 4 - assert agent_manager._base_init_kwargs["lp_metric"] == 2 - assert agent_manager._base_init_kwargs["min_dist"] == 0.0 - assert agent_manager._base_init_kwargs["max_repr"] == 800 - assert agent_manager._base_init_kwargs["bonus_scale_factor"] == 1.0 - assert agent_manager._base_init_kwargs["reward_free"] is True + assert experiment_manager._base_init_kwargs["lp_metric"] == 2 + assert experiment_manager._base_init_kwargs["min_dist"] == 0.0 + assert experiment_manager._base_init_kwargs["max_repr"] == 800 + assert experiment_manager._base_init_kwargs["bonus_scale_factor"] == 1.0 + assert experiment_manager._base_init_kwargs["reward_free"] is True - train_env = agent_manager.train_env[0](**agent_manager.train_env[1]) + train_env = experiment_manager.train_env[0](**experiment_manager.train_env[1]) assert train_env.reward_free is False assert train_env.array_observation is True - if agent_manager.agent_name == "rsucbvi": - assert agent_manager._base_init_kwargs["gamma"] == 1.0 + if experiment_manager.agent_name == "rsucbvi": + assert experiment_manager._base_init_kwargs["gamma"] == 1.0 - elif agent_manager.agent_name == "rsucbvi_alternative": - assert agent_manager._base_init_kwargs["gamma"] == 0.9 + elif experiment_manager.agent_name == "rsucbvi_alternative": + assert experiment_manager._base_init_kwargs["gamma"] == 0.9 else: raise ValueError() - # check that seeding is the same for each AgentManager instance + # check that seeding is the same for each ExperimentManager instance for ii in range(1, len(random_numbers)): assert np.array_equal(random_numbers[ii - 1], random_numbers[ii]) diff --git a/rlberry/experiment/tests/test_load_results.py b/rlberry/experiment/tests/test_load_results.py index 4876fd985..aca97bf25 100644 --- a/rlberry/experiment/tests/test_load_results.py +++ b/rlberry/experiment/tests/test_load_results.py @@ -15,9 +15,9 @@ def test_save_and_load(): sys.argv.append("--parallelization=thread") sys.argv.append("--output_dir=" + tmpdirname) print(sys.argv) - for agent_manager in experiment_generator(): - agent_manager.fit() - agent_manager.save() + for experiment_manager in experiment_generator(): + experiment_manager.fit() + experiment_manager.save() data = load_experiment_results(tmpdirname, "params_experiment") assert len(data) > 0 diff --git a/rlberry/experiment/yaml_utils.py b/rlberry/experiment/yaml_utils.py index ed8734d41..581b254ec 100644 --- a/rlberry/experiment/yaml_utils.py +++ b/rlberry/experiment/yaml_utils.py @@ -105,7 +105,7 @@ def parse_experiment_config( parallelization: str = "process", ) -> Generator[Tuple[int, dict], None, None]: """ - Read .yaml files. set global seed and convert to AgentManager instances. + Read .yaml files. set global seed and convert to ExperimentManager instances. Exemple of experiment config: @@ -128,18 +128,18 @@ def parse_experiment_config( n_fit : int Number of instances of each agent to fit max_workers : int or None - Maximum number of workers created in the fit() method of an instance of AgentManager. + Maximum number of workers created in the fit() method of an instance of ExperimentManager. output_base_dir : str - Directory where to save AgentManager results. + Directory where to save ExperimentManager results. parallelization : 'thread' or 'process', default : 'process' - Choose whether processes or threads are used in AgentManager parallelization. + Choose whether processes or threads are used in ExperimentManager parallelization. Returns ------- seed: int global seed - agent_manager_kwargs: - parameters to create an AgentManager instance. + experiment_manager_kwargs: + parameters to create an ExperimentManager instance. """ with path.open() as file: config = yaml.safe_load(file) @@ -148,7 +148,7 @@ def parse_experiment_config( n_fit = n_fit for agent_path in config["agents"]: - # set seed before creating AgentManager + # set seed before creating ExperimentManager seed = config["seed"] agent_name = Path(agent_path).stem @@ -210,5 +210,5 @@ def parse_experiment_config( if __name__ == "__main__": filename = "examples/demo_experiment/params_experiment.yaml" - for seed, agent_manager in parse_experiment_config(Path(filename)): + for seed, experiment_manager in parse_experiment_config(Path(filename)): print(seed) diff --git a/rlberry/manager/__init__.py b/rlberry/manager/__init__.py index be85be64e..e106d36ea 100644 --- a/rlberry/manager/__init__.py +++ b/rlberry/manager/__init__.py @@ -1,3 +1,8 @@ -from .agent_manager import AgentManager, preset_manager +from .experiment_manager import ExperimentManager, preset_manager from .multiple_managers import MultipleManagers +from .remote_experiment_manager import RemoteExperimentManager from .evaluation import evaluate_agents, plot_writer_data, read_writer_data + +# (Remote)AgentManager alias for the (Remote)ExperimentManager class, for backward compatibility +AgentManager = ExperimentManager +RemoteAgentManager = RemoteExperimentManager diff --git a/rlberry/manager/evaluation.py b/rlberry/manager/evaluation.py index 2fe028b76..1e2e04bb9 100644 --- a/rlberry/manager/evaluation.py +++ b/rlberry/manager/evaluation.py @@ -11,14 +11,14 @@ import dill from distutils.version import LooseVersion -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager import rlberry logger = rlberry.logger def evaluate_agents( - agent_manager_list, + experiment_manager_list, n_simulations=5, choose_random_agents=True, fignum=None, @@ -27,16 +27,16 @@ def evaluate_agents( sns_kwargs=None, ): """ - Evaluate and compare each of the agents in agent_manager_list. + Evaluate and compare each of the agents in experiment_manager_list. Parameters ---------- - agent_manager_list : list of AgentManager objects. + experiment_manager_list : list of ExperimentManager objects. n_simulations: int - Number of calls to the eval() method of each AgentManager instance. + Number of calls to the eval() method of each ExperimentManager instance. choose_random_agents: bool - If true and n_fit>1, use a random fitted agent from each AgentManager at each evaluation. - Otherwise, each fitted agent of each AgentManager is evaluated n_simulations times. + If true and n_fit>1, use a random fitted agent from each ExperimentManager at each evaluation. + Otherwise, each fitted agent of each ExperimentManager is evaluated n_simulations times. fignum: string or int Identifier of plot figure. show: bool @@ -53,11 +53,11 @@ def evaluate_agents( Examples -------- >>> from rlberry.agents.torch import A2CAgent, DQNAgent - >>> from rlberry.manager import AgentManager, evaluate_agents + >>> from rlberry.manager import ExperimentManager, evaluate_agents >>> from rlberry.envs import gym_make >>> >>> if __name__=="__main__": - >>> managers = [ AgentManager( + >>> managers = [ ExperimentManager( >>> agent_class, >>> (gym_make, dict(id="CartPole-v1")), >>> fit_budget=1e4, @@ -79,21 +79,23 @@ def evaluate_agents( # eval_outputs = [] - for agent_manager in agent_manager_list: - logger.info(f"Evaluating {agent_manager.agent_name}...") + for experiment_manager in experiment_manager_list: + logger.info(f"Evaluating {experiment_manager.agent_name}...") if choose_random_agents: - outputs = agent_manager.eval_agents(n_simulations) + outputs = experiment_manager.eval_agents(n_simulations) else: outputs = [] - for idx in range(len(agent_manager.agent_handlers)): - outputs += list(agent_manager.eval_agents(n_simulations, agent_id=idx)) + for idx in range(len(experiment_manager.agent_handlers)): + outputs += list( + experiment_manager.eval_agents(n_simulations, agent_id=idx) + ) if len(outputs) > 0: eval_outputs.append(outputs) if len(eval_outputs) == 0: logger.error( - "[evaluate_agents]: No evaluation data. Make sure AgentManager.fit() has been called." + "[evaluate_agents]: No evaluation data. Make sure ExperimentManager.fit() has been called." ) return @@ -104,8 +106,8 @@ def evaluate_agents( # build unique agent IDs (in case there are two agents with the same ID) unique_ids = [] id_count = {} - for agent_manager in agent_manager_list: - name = agent_manager.agent_name + for experiment_manager in experiment_manager_list: + name = experiment_manager.agent_name if name not in id_count: id_count[name] = 1 else: @@ -134,19 +136,19 @@ def evaluate_agents( def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None): """ - Given a list of AgentManager or a folder, read data (corresponding to info) obtained in each episode. + Given a list of ExperimentManager or a folder, read data (corresponding to info) obtained in each episode. The dictionary returned by agents' .fit() method must contain a key equal to `info`. Parameters ---------- - data_source : :class:`~rlberry.manager.AgentManager`, or list of :class:`~rlberry.manager.AgentManager` or str or list of str - - If AgentManager or list of AgentManager, load data from it (the agents must be fitted). + data_source : :class:`~rlberry.manager.ExperimentManager`, or list of :class:`~rlberry.manager.ExperimentManager` or str or list of str + - If ExperimentManager or list of ExperimentManager, load data from it (the agents must be fitted). - If str, the string must be the string path of a directory, each subdirectory of this directory must contain pickle files. Load the data from the directory of the latest experiment in date. This str should be equal to the value of the `output_dir` parameter in - :class:`~rlberry.manager.AgentManager`. + :class:`~rlberry.manager.ExperimentManager`. - If list of str, each string must be a directory containing pickle files. Load the data from these pickle files. @@ -175,11 +177,11 @@ def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None) Examples -------- >>> from rlberry.agents.torch import A2CAgent, DQNAgent - >>> from rlberry.manager import AgentManager, read_writer_data + >>> from rlberry.manager import ExperimentManager, read_writer_data >>> from rlberry.envs import gym_make >>> >>> if __name__=="__main__": - >>> managers = [ AgentManager( + >>> managers = [ ExperimentManager( >>> agent_class, >>> (gym_make, dict(id="CartPole-v1")), >>> fit_budget=1e4, @@ -196,12 +198,12 @@ def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None) input_dir = None if not isinstance(data_source, list): - if isinstance(data_source, AgentManager): + if isinstance(data_source, ExperimentManager): data_source = [data_source] else: take_last_date = True else: - if not isinstance(data_source[0], AgentManager): + if not isinstance(data_source[0], ExperimentManager): take_last_date = False for dir in data_source: files = list(Path(dir).iterdir()) @@ -210,8 +212,8 @@ def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None) "One of the files in data_source does not contain pickle files" ) - if isinstance(data_source[0], AgentManager): - agent_manager_list = data_source + if isinstance(data_source[0], ExperimentManager): + experiment_manager_list = data_source else: input_dir = data_source @@ -241,13 +243,13 @@ def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None) for id_f, filename in enumerate(input_dir): writer_datas.append(_load_data(filename, agent_dirs[id_f], id_agent)) else: - for manager in agent_manager_list: - # Important: since manager can be a RemoteAgentManager, + for manager in experiment_manager_list: + # Important: since manager can be a RemoteExperimentManager, # it is important to avoid repeated accesses to its methods and properties. # That is why writer_data is taken from the manager instance only in # the line below. writer_datas.append(manager.get_writer_data()) - agent_name_list = [manager.agent_name for manager in agent_manager_list] + agent_name_list = [manager.agent_name for manager in experiment_manager_list] # preprocess agent stats data_list = [] @@ -264,7 +266,7 @@ def read_writer_data(data_source, tag=None, preprocess_func=None, id_agent=None) processed_df["value"] = preprocess_funcs[id_tag]( processed_df["value"].values ) - # update name according to AgentManager name and + # update name according to ExperimentManager name and # n_simulation processed_df["name"] = agent_name processed_df["n_simu"] = idx @@ -296,7 +298,7 @@ def _get_last_xp(input_dir, name): if len(datetimes) == 0: raise ValueError( "input dir not found, verify that the agent are trained " - 'and that AgentManager.outdir_id_style="timestamp"' + 'and that ExperimentManager.outdir_id_style="timestamp"' ) # get the date of last experiment @@ -366,7 +368,7 @@ def plot_writer_data( plot_raw_curves=True, ): """ - Given a list of AgentManager or a folder, plot data (corresponding to info) obtained in each episode. + Given a list of ExperimentManager or a folder, plot data (corresponding to info) obtained in each episode. The dictionary returned by agents' .fit() method must contain a key equal to `info`. If there are several simulations, a confidence interval is plotted ( 90% percentile interval if seaborn version >= 0.12.0 @@ -375,14 +377,14 @@ def plot_writer_data( Parameters ---------- - data_source : :class:`~rlberry.manager.AgentManager`, or list of :class:`~rlberry.manager.AgentManager` or str or list of str - - If AgentManager or list of AgentManager, load data from it (the agents must be fitted). + data_source : :class:`~rlberry.manager.ExperimentManager`, or list of :class:`~rlberry.manager.ExperimentManager` or str or list of str + - If ExperimentManager or list of ExperimentManager, load data from it (the agents must be fitted). - If str, the string must be the string path of a directory, each subdirectory of this directory must contain pickle files. load the data from the directory of the latest experiment in date. This str should be equal to the value of the `output_dir` parameter in - :class:`~rlberry.manager.AgentManager`. + :class:`~rlberry.manager.ExperimentManager`. - If list of str, each string must be a directory containing pickle files load the data from these pickle files. @@ -425,11 +427,11 @@ def plot_writer_data( Examples -------- >>> from rlberry.agents.torch import A2CAgent, DQNAgent - >>> from rlberry.manager import AgentManager, plot_writer_data + >>> from rlberry.manager import ExperimentManager, plot_writer_data >>> from rlberry.envs import gym_make >>> >>> if __name__=="__main__": - >>> managers = [ AgentManager( + >>> managers = [ ExperimentManager( >>> agent_class, >>> (gym_make, dict(id="CartPole-v1")), >>> fit_budget=4e4, diff --git a/rlberry/manager/agent_manager.py b/rlberry/manager/experiment_manager.py similarity index 94% rename from rlberry/manager/agent_manager.py rename to rlberry/manager/experiment_manager.py index a09f9852c..963fe7f2a 100644 --- a/rlberry/manager/agent_manager.py +++ b/rlberry/manager/experiment_manager.py @@ -47,7 +47,7 @@ class AgentHandler: """ Wraps an Agent so that it can be either loaded in memory or represented by a file storing the Agent data. - It is used by `class`:~rlberry.manager.AgentManager` to handle the fact that + It is used by `class`:~rlberry.manager.ExperimentManager` to handle the fact that not all agents can be pickled, when returning from the processes that train the agents. @@ -157,7 +157,7 @@ def __getattr__(self, attr): # -class AgentManager: +class ExperimentManager: """ Class to train, optimize hyperparameters, evaluate and gather statistics about an agent. @@ -238,7 +238,7 @@ class AgentManager: Directory where the manager saves data. rlberry_version: str - Current version of rlberry. This is saved when calling agent_manager.save() + Current version of rlberry. This is saved when calling experiment_manager.save() and it is then used in load() to warn if the version of the agent is not a match with current rlberry version. @@ -246,8 +246,8 @@ class AgentManager: -------- >>> from rlberry.agents.torch import A2CAgent >>> from rlberry.envs import gym_make - >>> from rlberry.manager import AgentManager - >>> manager = AgentManager( + >>> from rlberry.manager import ExperimentManager + >>> manager = ExperimentManager( >>> A2CAgent, >>> (env_ctor, env_kwargs), >>> fit_budget=100, @@ -283,7 +283,7 @@ def __init__( thread_shared_data=None, ): # agent_class should only be None when the constructor is called - # by the class method AgentManager.load(), since the agent class + # by the class method ExperimentManager.load(), since the agent class # will be loaded. if agent_class is None: @@ -299,11 +299,11 @@ def __init__( # Check train_env and eval_env assert isinstance( train_env, Tuple - ), "[AgentManager]train_env must be Tuple (constructor, kwargs)" + ), "[ExperimentManager]train_env must be Tuple (constructor, kwargs)" if eval_env is not None: assert isinstance( eval_env, Tuple - ), "[AgentManager]train_env must be Tuple (constructor, kwargs)" + ), "[ExperimentManager]train_env must be Tuple (constructor, kwargs)" # check options assert outdir_id_style in [None, "unique", "timestamp"] @@ -329,7 +329,7 @@ def __init__( if parallelization != "thread" and thread_shared_data is not None: logger.warning( f"Using thread_shared_data and parallelization = {parallelization}" - " in AgentManager does *not* share data among Agent instances!" + " in ExperimentManager does *not* share data among Agent instances!" " Each process will have its copy of thread_shared_data." ) @@ -357,7 +357,9 @@ def __init__( try: self.fit_budget = self.fit_kwargs.pop("fit_budget") except KeyError: - raise ValueError("[AgentManager] fit_budget missing in __init__().") + raise ValueError( + "[ExperimentManager] fit_budget missing in __init__()." + ) # extra params per instance if init_kwargs_per_instance is not None: assert len(init_kwargs_per_instance) == n_fit @@ -407,7 +409,7 @@ def __init__( # if default_writer_kwargs: # logger.warning( # "(Re)defining the following DefaultWriter" - # f" parameters in AgentManager: {list(default_writer_kwargs.keys())}" + # f" parameters in ExperimentManager: {list(default_writer_kwargs.keys())}" # ) for ii in range(n_fit): self.agent_default_writer_kwargs[ii].update(default_writer_kwargs) @@ -567,7 +569,7 @@ def eval_agents( agent = self.agent_handlers[agent_idx] if agent.is_empty(): logger.error( - "Calling eval() in an AgentManager instance contaning an empty AgentHandler." + "Calling eval() in an ExperimentManager instance contaning an empty AgentHandler." " Returning []." ) return [] @@ -604,7 +606,7 @@ def set_writer(self, idx, writer_fn, writer_kwargs=None): Note ----- - Must be called right after creating an instance of AgentManager. + Must be called right after creating an instance of ExperimentManager. Parameters ---------- @@ -617,12 +619,12 @@ def set_writer(self, idx, writer_fn, writer_kwargs=None): kwargs for writer_fn idx : int Index of the agent to set the writer (0 <= idx < `n_fit`). - AgentManager fits `n_fit` agents, the writer of each one of them + ExperimentManager fits `n_fit` agents, the writer of each one of them needs to be set separetely. """ assert ( idx >= 0 and idx < self.n_fit - ), "Invalid index sent to AgentManager.set_writer()" + ), "Invalid index sent to ExperimentManager.set_writer()" writer_kwargs = writer_kwargs or {} self.writers[idx] = (writer_fn, writer_kwargs) @@ -676,12 +678,12 @@ def fit(self, budget=None, **kwargs): _check_not_importing_main() except RuntimeError as exc: raise RuntimeError( - """Warning: in AgentManager, if mp_context='spawn' and + """Warning: in ExperimentManager, if mp_context='spawn' and parallelization="process" then the script must be run outside a notebook and protected by a if __name__ == '__main__': For example: if __name__ == '__main__': - agent = AgentManager(UCBVIAgent,(Chain, {}), + agent = ExperimentManager(UCBVIAgent,(Chain, {}), mp_context="spawn", parallelization="process") @@ -690,7 +692,7 @@ def fit(self, budget=None, **kwargs): ) from exc logger.info( - f"Running AgentManager fit() for {self.agent_name}" + f"Running ExperimentManager fit() for {self.agent_name}" f" with n_fit = {self.n_fit} and max_workers = {self.max_workers}." ) seeders = self.seeder.spawn(self.n_fit) @@ -763,14 +765,14 @@ def _gather_default_writer_data(self): self.default_writer_data[ii] = agent.writer.data def save(self): - """Save AgentManager data to :attr:`~rlberry.manager.agent_manager.AgentManager.output_dir`. + """Save ExperimentManager data to :attr:`~rlberry.manager.experiment_manager.ExperimentManager.output_dir`. - Saves object so that the data can be later loaded to recreate an AgentManager instance. + Saves object so that the data can be later loaded to recreate an ExperimentManager instance. Returns ------- :class:`pathlib.Path` - Filename where the AgentManager object was saved. + Filename where the ExperimentManager object was saved. """ # use self.output_dir output_dir = self.output_dir_ @@ -799,7 +801,7 @@ def save(self): logger.warning("Could not save default_writer_data.") # - # Pickle AgentManager instance + # Pickle ExperimentManager instance # # clear agent handlers @@ -813,22 +815,26 @@ def save(self): try: with filename.open("wb") as ff: pickle.dump(self.__dict__, ff) - logger.info("Saved AgentManager({}) using pickle.".format(self.agent_name)) + logger.info( + "Saved ExperimentManager({}) using pickle.".format(self.agent_name) + ) except Exception: try: with filename.open("wb") as ff: dill.dump(self.__dict__, ff) logger.info( - "Saved AgentManager({}) using dill.".format(self.agent_name) + "Saved ExperimentManager({}) using dill.".format(self.agent_name) ) except Exception as ex: - logger.warning("[AgentManager] Instance cannot be pickled: " + str(ex)) + logger.warning( + "[ExperimentManager] Instance cannot be pickled: " + str(ex) + ) return filename @classmethod def load(cls, filename): - """Loads an AgentManager instance from a file. + """Loads an ExperimentManager instance from a file. Parameters ---------- @@ -836,14 +842,14 @@ def load(cls, filename): Returns ------- - :class:`rlberry.manager.AgentManager` - Loaded instance of AgentManager. + :class:`rlberry.manager.ExperimentManager` + Loaded instance of ExperimentManager. """ filename = Path(filename).with_suffix(".pickle") if filename.name != "manager_obj.pickle": raise ValueError( - "The agent_manager objects should be save in file named 'manager_obj.pickle'" + "The experiment_manager objects should be save in file named 'manager_obj.pickle'" ) obj = cls(None, None, None) @@ -1244,8 +1250,8 @@ def _optuna_objective( # # fit and evaluate agents # - # Create AgentManager with hyperparams - params_stats = AgentManager( + # Create ExperimentManager with hyperparams + params_stats = ExperimentManager( agent_class, train_env, fit_budget, @@ -1346,14 +1352,14 @@ def preset_manager(*args, **kwds): >>> seed=42, >>> max_workers=6 >>> ) - >>> ppo = manager_maker(PPOAgent, fit_budget = 100) # of type AgentManager + >>> ppo = manager_maker(PPOAgent, fit_budget = 100) # of type ExperimentManager >>> dqn = manager_maker(DQNAgent, fit_budget = 200) >>> >>> ppo.fit() >>> dqn.fit() """ - class Manager(AgentManager): - __init__ = functools.partialmethod(AgentManager.__init__, *args, **kwds) + class Manager(ExperimentManager): + __init__ = functools.partialmethod(ExperimentManager.__init__, *args, **kwds) return Manager diff --git a/rlberry/manager/multiple_managers.py b/rlberry/manager/multiple_managers.py index 3748b45ad..92d39e5d6 100644 --- a/rlberry/manager/multiple_managers.py +++ b/rlberry/manager/multiple_managers.py @@ -13,12 +13,12 @@ def fit_stats(stats, save): class MultipleManagers: """ - Class to fit multiple AgentManager instances in parallel with multiple threads. + Class to fit multiple ExperimentManager instances in parallel with multiple threads. Parameters ---------- max_workers: int, default=None - max number of workers (AgentManager instances) fitted at the same time. + max number of workers (ExperimentManager instances) fitted at the same time. parallelization: {'thread', 'process'}, default: 'process' Whether to parallelize agent training using threads or processes. mp_context: {'spawn', 'fork', 'forkserver'}, default: 'spawn'. @@ -39,25 +39,25 @@ def __init__( self.parallelization = parallelization self.mp_context = mp_context - def append(self, agent_manager): + def append(self, experiment_manager): """ - Append new AgentManager instance. + Append new ExperimentManager instance. Parameters ---------- - agent_manager : AgentManager + experiment_manager : ExperimentManager """ - self.instances.append(agent_manager) + self.instances.append(experiment_manager) def run(self, save=True): """ - Fit AgentManager instances in parallel. + Fit ExperimentManager instances in parallel. Parameters ---------- save: bool, default: True - If true, save AgentManager intances immediately after fitting. - AgentManager.save() is called. + If true, save ExperimentManager intances immediately after fitting. + ExperimentManager.save() is called. """ if self.parallelization == "thread": executor_class = concurrent.futures.ThreadPoolExecutor @@ -84,8 +84,8 @@ def run(self, save=True): def save(self): """ - Pickle AgentManager instances and saves fit statistics in .csv files. - The output folder is defined in each of the AgentManager instances. + Pickle ExperimentManager instances and saves fit statistics in .csv files. + The output folder is defined in each of the ExperimentManager instances. """ for stats in self.instances: stats.save() diff --git a/rlberry/manager/remote_agent_manager.py b/rlberry/manager/remote_experiment_manager.py similarity index 84% rename from rlberry/manager/remote_agent_manager.py rename to rlberry/manager/remote_experiment_manager.py index 2ef8aef45..38335e2f2 100644 --- a/rlberry/manager/remote_agent_manager.py +++ b/rlberry/manager/remote_experiment_manager.py @@ -16,16 +16,16 @@ logger = rlberry.logger -class RemoteAgentManager: +class RemoteExperimentManager: """ - Class to define a client that handles an AgentManager instance in a remote BerryServer. + Class to define a client that handles an ExperimentManager instance in a remote BerryServer. Parameters ---------- client: BerryClient Client instance, to communicate with a BerryServer. **kwargs: - Parameters for AgentManager instance. + Parameters for ExperimentManager instance. Some parameters (as agent_class, train_env, eval_env) can be defined using a ResourceRequest. """ @@ -37,7 +37,7 @@ def __init__( if client: self._client = client - # Create a remote AgentManager object and keep reference to the filename + # Create a remote ExperimentManager object and keep reference to the filename # in the server where the object was saved. msg = self._client.send( interface.Message.create( @@ -49,7 +49,9 @@ def __init__( if msg.command == interface.Command.RAISE_EXCEPTION: raise Exception(msg.message) - self._remote_agent_manager_filename = pathlib.Path(msg.info["filename"]) + self._remote_experiment_manager_filename = pathlib.Path( + msg.info["filename"] + ) # get useful attributes self.agent_name = msg.info["agent_name"] @@ -60,12 +62,12 @@ def set_client(self, client: BerryClient): @property def remote_file(self): - return str(self._remote_agent_manager_filename) + return str(self._remote_experiment_manager_filename) def get_writer_data(self): """ - * Calls get_writer_data() in the remote AgentManager and returns the result locally. - * If tensorboard data is available in the remote AgentManager, the data is zipped, + * Calls get_writer_data() in the remote ExperimentManager and returns the result locally. + * If tensorboard data is available in the remote ExperimentManager, the data is zipped, received locally and unzipped. """ msg = self._client.send( @@ -174,11 +176,11 @@ def optimize_hyperparams(self, **kwargs): def save(self): """ - Save RemoteAgentManager data to self.output_dir. + Save RemoteExperimentManager data to self.output_dir. Returns ------- - filename where the AgentManager object was saved. + filename where the ExperimentManager object was saved. """ # use self.output_dir output_dir = self.output_dir @@ -194,18 +196,22 @@ def save(self): with filename.open("wb") as ff: pickle.dump(self.__dict__, ff) logger.info( - "Saved RemoteAgentManager({}) using pickle.".format(self.agent_name) + "Saved RemoteExperimentManager({}) using pickle.".format( + self.agent_name + ) ) except Exception: try: with filename.open("wb") as ff: dill.dump(self.__dict__, ff) logger.info( - "Saved RemoteAgentManager({}) using dill.".format(self.agent_name) + "Saved RemoteExperimentManager({}) using dill.".format( + self.agent_name + ) ) except Exception as ex: logger.warning( - "[RemoteAgentManager] Instance cannot be pickled: " + str(ex) + "[RemoteExperimentManager] Instance cannot be pickled: " + str(ex) ) return filename @@ -218,11 +224,11 @@ def load(cls, filename): try: with filename.open("rb") as ff: tmp_dict = pickle.load(ff) - logger.info("Loaded RemoteAgentManager using pickle.") + logger.info("Loaded RemoteExperimentManager using pickle.") except Exception: with filename.open("rb") as ff: tmp_dict = dill.load(ff) - logger.info("Loaded RemoteAgentManager using dill.") + logger.info("Loaded RemoteExperimentManager using dill.") obj.__dict__.clear() obj.__dict__.update(tmp_dict) diff --git a/rlberry/manager/tests/test_agent_manager.py b/rlberry/manager/tests/test_experiment_manager.py similarity index 81% rename from rlberry/manager/tests/test_agent_manager.py rename to rlberry/manager/tests/test_experiment_manager.py index cd8cbfc74..63a489623 100644 --- a/rlberry/manager/tests/test_agent_manager.py +++ b/rlberry/manager/tests/test_experiment_manager.py @@ -5,7 +5,7 @@ from rlberry.envs import GridWorld from rlberry.agents import AgentWithSimplePolicy from rlberry.manager import ( - AgentManager, + ExperimentManager, plot_writer_data, evaluate_agents, preset_manager, @@ -43,7 +43,7 @@ def sample_parameters(cls, trial): @pytest.mark.xfail(sys.platform == "win32", reason="bug with windows???") -def test_agent_manager_1(): +def test_experiment_manager_1(): # Define train and evaluation envs train_env = (GridWorld, {}) @@ -56,9 +56,9 @@ def test_agent_manager_1(): agent.fit(10) agent.policy(None) - # Run AgentManager + # Run ExperimentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(2)] - stats_agent1 = AgentManager( + stats_agent1 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -68,7 +68,7 @@ def test_agent_manager_1(): seed=123, init_kwargs_per_instance=params_per_instance, ) - stats_agent2 = AgentManager( + stats_agent2 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -77,8 +77,8 @@ def test_agent_manager_1(): n_fit=2, seed=123, ) - agent_manager_list = [stats_agent1, stats_agent2] - for st in agent_manager_list: + experiment_manager_list = [stats_agent1, stats_agent2] + for st in experiment_manager_list: st.fit() for ii, instance in enumerate(stats_agent1.agent_handlers): @@ -90,32 +90,32 @@ def test_agent_manager_1(): assert instance.hyperparameter2 == 100 # learning curves - plot_writer_data(agent_manager_list, tag="episode_rewards", show=False) + plot_writer_data(experiment_manager_list, tag="episode_rewards", show=False) # compare final policies - evaluate_agents(agent_manager_list, show=False) + evaluate_agents(experiment_manager_list, show=False) # check if fitted - for agent_manager in agent_manager_list: - assert len(agent_manager.agent_handlers) == 2 - for agent in agent_manager.agent_handlers: + for experiment_manager in experiment_manager_list: + assert len(experiment_manager.agent_handlers) == 2 + for agent in experiment_manager.agent_handlers: assert agent.fitted # test saving/loading fname = stats_agent1.save() - loaded_stats = AgentManager.load(fname) + loaded_stats = ExperimentManager.load(fname) assert stats_agent1.unique_id == loaded_stats.unique_id # test hyperparameter optimization call loaded_stats.optimize_hyperparams(n_trials=3) loaded_stats.optimize_hyperparams(n_trials=3, continue_previous=True) - for st in agent_manager_list: + for st in experiment_manager_list: st.clear_output_dir() @pytest.mark.xfail(sys.platform == "win32", reason="bug with windows???") -def test_agent_manager_2(): +def test_experiment_manager_2(): # Define train and evaluation envs train_env = (GridWorld, {}) eval_env = (GridWorld, {}) @@ -124,8 +124,8 @@ def test_agent_manager_2(): params = {} eval_kwargs = dict(eval_horizon=10) - # Run AgentManager - stats_agent1 = AgentManager( + # Run ExperimentManager + stats_agent1 = ExperimentManager( DummyAgent, train_env, eval_env=eval_env, @@ -135,7 +135,7 @@ def test_agent_manager_2(): n_fit=4, seed=123, ) - stats_agent2 = AgentManager( + stats_agent2 = ExperimentManager( DummyAgent, train_env, eval_env=eval_env, @@ -145,29 +145,29 @@ def test_agent_manager_2(): n_fit=4, seed=123, ) - agent_manager_list = [stats_agent1, stats_agent2] - for st in agent_manager_list: + experiment_manager_list = [stats_agent1, stats_agent2] + for st in experiment_manager_list: st.fit() # compare final policies - outputs = evaluate_agents(agent_manager_list, n_simulations=5, show=False) + outputs = evaluate_agents(experiment_manager_list, n_simulations=5, show=False) assert len(outputs) == 5 outputs = evaluate_agents( - agent_manager_list, n_simulations=5, show=False, choose_random_agents=False + experiment_manager_list, n_simulations=5, show=False, choose_random_agents=False ) assert len(outputs) == 4 * 5 # learning curves - plot_writer_data(agent_manager_list, tag="episode_rewards", show=False) + plot_writer_data(experiment_manager_list, tag="episode_rewards", show=False) # check if fitted - for agent_manager in agent_manager_list: - assert len(agent_manager.agent_handlers) == 4 - for agent in agent_manager.agent_handlers: + for experiment_manager in experiment_manager_list: + assert len(experiment_manager.agent_handlers) == 4 + for agent in experiment_manager.agent_handlers: assert agent.fitted # test saving/loading fname = stats_agent1.save() - loaded_stats = AgentManager.load(fname) + loaded_stats = ExperimentManager.load(fname) assert stats_agent1.unique_id == loaded_stats.unique_id # test hyperparemeter optimization @@ -182,19 +182,19 @@ def test_agent_manager_2(): @pytest.mark.parametrize("train_env", [(GridWorld, None), (None, None)]) -def test_agent_manager_partial_fit_and_tuple_env(train_env): +def test_experiment_manager_partial_fit_and_tuple_env(train_env): # Define train and evaluation envs train_env = ( GridWorld, None, - ) # tuple (constructor, kwargs) must also work in AgentManager + ) # tuple (constructor, kwargs) must also work in ExperimentManager # Parameters params = {} eval_kwargs = dict(eval_horizon=10) - # Run AgentManager - stats = AgentManager( + # Run ExperimentManager + stats = ExperimentManager( DummyAgent, train_env, init_kwargs=params, @@ -203,7 +203,7 @@ def test_agent_manager_partial_fit_and_tuple_env(train_env): eval_kwargs=eval_kwargs, seed=123, ) - stats2 = AgentManager( + stats2 = ExperimentManager( DummyAgent, train_env, init_kwargs=params, @@ -246,9 +246,9 @@ def test_equality(): params = dict(hyperparameter1=-1, hyperparameter2=100) eval_kwargs = dict(eval_horizon=10) - # Run AgentManager + # Run ExperimentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)] - stats_agent1 = AgentManager( + stats_agent1 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -259,7 +259,7 @@ def test_equality(): init_kwargs_per_instance=params_per_instance, ) - stats_agent2 = AgentManager( + stats_agent2 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -270,7 +270,7 @@ def test_equality(): init_kwargs_per_instance=params_per_instance, ) - stats_agent3 = AgentManager( + stats_agent3 = ExperimentManager( DummyAgent, train_env, fit_budget=42, @@ -293,9 +293,9 @@ def test_version(): params = dict(hyperparameter1=-1, hyperparameter2=100) eval_kwargs = dict(eval_horizon=10) - # Run AgentManager + # Run ExperimentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)] - stats_agent1 = AgentManager( + stats_agent1 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -317,9 +317,9 @@ def test_profile(): params = dict(hyperparameter1=-1, hyperparameter2=100) eval_kwargs = dict(eval_horizon=10) - # Run AgentManager + # Run ExperimentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)] - stats_agent1 = AgentManager( + stats_agent1 = ExperimentManager( DummyAgent, train_env, fit_budget=5, @@ -341,7 +341,7 @@ def test_preset(): params = dict(hyperparameter1=-1, hyperparameter2=100) eval_kwargs = dict(eval_horizon=10) - # Run AgentManager + # Run ExperimentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)] manager_maker = preset_manager( @@ -367,8 +367,8 @@ def test_compress(): ) eval_kwargs = dict(eval_horizon=10) - # Run AgentManager - stats = AgentManager( + # Run ExperimentManager + stats = ExperimentManager( DummyAgent, train_env, fit_budget=5, diff --git a/rlberry/manager/tests/test_agent_manager_seeding.py b/rlberry/manager/tests/test_experiment_manager_seeding.py similarity index 84% rename from rlberry/manager/tests/test_agent_manager_seeding.py rename to rlberry/manager/tests/test_experiment_manager_seeding.py index fc0704166..d0e5a317c 100644 --- a/rlberry/manager/tests/test_agent_manager_seeding.py +++ b/rlberry/manager/tests/test_experiment_manager_seeding.py @@ -1,7 +1,7 @@ from rlberry.envs.tests.test_env_seeding import get_env_trajectory, compare_trajectories from rlberry.envs import gym_make from rlberry.envs.classic_control import MountainCar -from rlberry.manager import AgentManager, MultipleManagers +from rlberry.manager import ExperimentManager, MultipleManagers from rlberry.agents.torch import A2CAgent import gymnasium as gym import pytest @@ -18,22 +18,22 @@ ), ], ) -def test_agent_manager_and_multiple_managers_seeding(env, agent_class): - agent_manager = AgentManager( +def test_experiment_manager_and_multiple_managers_seeding(env, agent_class): + experiment_manager = ExperimentManager( agent_class, env, fit_budget=2, init_kwargs={}, n_fit=6, seed=3456 ) - agent_manager_test = AgentManager( + experiment_manager_test = ExperimentManager( agent_class, env, fit_budget=2, init_kwargs={}, n_fit=6, seed=3456 ) multimanagers = MultipleManagers() - multimanagers.append(agent_manager) - multimanagers.append(agent_manager_test) + multimanagers.append(experiment_manager) + multimanagers.append(experiment_manager_test) multimanagers.run() stats1, stats2 = multimanagers.managers - for ii in range(2, agent_manager.n_fit): + for ii in range(2, experiment_manager.n_fit): traj1 = get_env_trajectory(stats1.agent_handlers[ii - 2].env, horizon=10) traj2 = get_env_trajectory(stats1.agent_handlers[ii - 1].env, horizon=10) traj3 = get_env_trajectory(stats1.agent_handlers[ii].env, horizon=10) @@ -49,7 +49,7 @@ def test_agent_manager_and_multiple_managers_seeding(env, agent_class): assert compare_trajectories(traj2, traj2_test) assert compare_trajectories(traj3, traj3_test) - for ii in range(2, agent_manager.n_fit): + for ii in range(2, experiment_manager.n_fit): rand1 = stats1.agent_handlers[ii - 2].seeder.rng.integers(2**32) rand2 = stats1.agent_handlers[ii - 1].seeder.rng.integers(2**32) rand3 = stats1.agent_handlers[ii].seeder.rng.integers(2**32) diff --git a/rlberry/manager/tests/test_hyperparam_optim.py b/rlberry/manager/tests/test_hyperparam_optim.py index bb5c27c84..2803adbcd 100644 --- a/rlberry/manager/tests/test_hyperparam_optim.py +++ b/rlberry/manager/tests/test_hyperparam_optim.py @@ -1,7 +1,7 @@ from rlberry.envs import GridWorld from rlberry.agents import AgentWithSimplePolicy from rlberry.agents.dynprog.value_iteration import ValueIterationAgent -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager from optuna.samplers import TPESampler import numpy as np import pytest @@ -49,8 +49,8 @@ def test_hyperparam_optim_tpe(): # Define trainenv train_env = (GridWorld, {}) with tempfile.TemporaryDirectory() as tmpdirname: - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( DummyAgent, train_env, fit_budget=1, @@ -81,8 +81,8 @@ def test_hyperparam_optim_random(parallelization, custom_eval_function, fit_frac # Define train env train_env = (GridWorld, {}) with tempfile.TemporaryDirectory() as tmpdirname: - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( DummyAgent, train_env, init_kwargs={}, @@ -108,8 +108,8 @@ def test_hyperparam_optim_grid(): # Define train env train_env = (GridWorld, {}) with tempfile.TemporaryDirectory() as tmpdirname: - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( DummyAgent, train_env, init_kwargs={}, @@ -132,8 +132,8 @@ def test_hyperparam_optim_cmaes(): # Define train env train_env = (GridWorld, {}) with tempfile.TemporaryDirectory() as tmpdirname: - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( DummyAgent, train_env, init_kwargs={}, @@ -172,7 +172,7 @@ def sample_parameters(cls, trial): vi_params = {"gamma": 0.1, "epsilon": 1e-3} - vi_stats = AgentManager( + vi_stats = ExperimentManager( ValueIterationAgentToOptimize, env, fit_budget=0, diff --git a/rlberry/manager/tests/test_plot.py b/rlberry/manager/tests/test_plot.py index 9bf35b37b..70533ac60 100644 --- a/rlberry/manager/tests/test_plot.py +++ b/rlberry/manager/tests/test_plot.py @@ -7,7 +7,7 @@ from rlberry.wrappers import WriterWrapper from rlberry.envs import GridWorld -from rlberry.manager import plot_writer_data, AgentManager, read_writer_data +from rlberry.manager import plot_writer_data, ExperimentManager, read_writer_data from rlberry.agents import UCBVIAgent @@ -19,11 +19,11 @@ def __init__(self, env, **kwargs): self.env = WriterWrapper(self.env, self.writer, write_scalar="reward") -def _create_and_fit_agent_manager(output_dir, outdir_id_style): +def _create_and_fit_experiment_manager(output_dir, outdir_id_style): env_ctor = GridWorld env_kwargs = dict(nrows=2, ncols=2, reward_at={(1, 1): 0.1, (2, 2): 1.0}) - manager = AgentManager( + manager = ExperimentManager( VIAgent, (env_ctor, env_kwargs), fit_budget=10, @@ -44,7 +44,7 @@ def _compute_reward(rewards): def test_plot_writer_data_with_manager_input(outdir_id_style): with tempfile.TemporaryDirectory() as tmpdirname: output_dir = tmpdirname + "/rlberry_data" - manager = _create_and_fit_agent_manager(output_dir, outdir_id_style) + manager = _create_and_fit_experiment_manager(output_dir, outdir_id_style) os.system("ls " + tmpdirname + "/rlberry_data/manager_data") # Plot of the cumulative reward @@ -67,7 +67,7 @@ def test_plot_writer_data_with_manager_input(outdir_id_style): def test_ci(): with tempfile.TemporaryDirectory() as tmpdirname: output_dir = tmpdirname + "/rlberry_data" - manager = _create_and_fit_agent_manager(output_dir, None) + manager = _create_and_fit_experiment_manager(output_dir, None) os.system("ls " + tmpdirname + "/rlberry_data/manager_data") # Plot of the cumulative reward @@ -92,7 +92,7 @@ def test_ci(): def test_plot_writer_data_with_directory_input(outdir_id_style): with tempfile.TemporaryDirectory() as tmpdirname: output_dir = tmpdirname + "/rlberry_data" - manager = _create_and_fit_agent_manager(output_dir, outdir_id_style) + manager = _create_and_fit_experiment_manager(output_dir, outdir_id_style) del manager os.system("ls " + tmpdirname + "/rlberry_data/manager_data") diff --git a/rlberry/manager/tests/test_shared_data.py b/rlberry/manager/tests/test_shared_data.py index 799b9d368..dc44c7501 100644 --- a/rlberry/manager/tests/test_shared_data.py +++ b/rlberry/manager/tests/test_shared_data.py @@ -1,7 +1,7 @@ import pytest import numpy as np from rlberry.agents import Agent -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager class DummyAgent(Agent): @@ -21,7 +21,7 @@ def eval(self, **kwargs): @pytest.mark.parametrize("paralellization", ["thread", "process"]) def test_data_sharing(paralellization): shared_data = dict(X=np.arange(10)) - manager = AgentManager( + manager = ExperimentManager( agent_class=DummyAgent, fit_budget=-1, n_fit=4, diff --git a/rlberry/network/server_utils.py b/rlberry/network/server_utils.py index 4a63ee0fd..75922a83f 100644 --- a/rlberry/network/server_utils.py +++ b/rlberry/network/server_utils.py @@ -1,6 +1,6 @@ import pathlib from rlberry.network import interface -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager from rlberry import metadata_utils import rlberry.utils.io import base64 @@ -24,84 +24,84 @@ def execute_message( params["output_dir"] = base_dir / "server_data" / params["output_dir"] else: params["output_dir"] = base_dir / "server_data/" - agent_manager = AgentManager(**params) - filename = str(agent_manager.save()) + experiment_manager = ExperimentManager(**params) + filename = str(experiment_manager.save()) response = interface.Message.create( info=dict( filename=filename, - agent_name=agent_manager.agent_name, - output_dir=str(agent_manager.output_dir).replace( + agent_name=experiment_manager.agent_name, + output_dir=str(experiment_manager.output_dir).replace( "server_data/", "client_data/" ), ) ) - del agent_manager + del experiment_manager # AGENT_MANAGER_FIT elif message.command == interface.Command.AGENT_MANAGER_FIT: filename = message.params["filename"] budget = message.params["budget"] extra_params = message.params["extra_params"] - agent_manager = AgentManager.load(filename) - agent_manager.fit(budget, **extra_params) - agent_manager.save() + experiment_manager = ExperimentManager.load(filename) + experiment_manager.fit(budget, **extra_params) + experiment_manager.save() response = interface.Message.create(command=interface.Command.ECHO) - del agent_manager + del experiment_manager # AGENT_MANAGER_EVAL elif message.command == interface.Command.AGENT_MANAGER_EVAL: filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - eval_output = agent_manager.eval_agents(message.params["n_simulations"]) + experiment_manager = ExperimentManager.load(filename) + eval_output = experiment_manager.eval_agents(message.params["n_simulations"]) response = interface.Message.create(data=dict(output=eval_output)) - del agent_manager + del experiment_manager # AGENT_MANAGER_CLEAR_OUTPUT_DIR elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR: filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - agent_manager.clear_output_dir() + experiment_manager = ExperimentManager.load(filename) + experiment_manager.clear_output_dir() response = interface.Message.create( - message=f"Cleared output dir: {agent_manager.output_dir}" + message=f"Cleared output dir: {experiment_manager.output_dir}" ) - del agent_manager + del experiment_manager # AGENT_MANAGER_CLEAR_HANDLERS elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS: filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - agent_manager.clear_handlers() - agent_manager.save() + experiment_manager = ExperimentManager.load(filename) + experiment_manager.clear_handlers() + experiment_manager.save() response = interface.Message.create(message=f"Cleared handlers: {filename}") - del agent_manager + del experiment_manager # AGENT_MANAGER_SET_WRITER elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER: filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - agent_manager.set_writer(**message.params["kwargs"]) - agent_manager.save() - del agent_manager + experiment_manager = ExperimentManager.load(filename) + experiment_manager.set_writer(**message.params["kwargs"]) + experiment_manager.save() + del experiment_manager # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS: filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - best_params_dict = agent_manager.optimize_hyperparams( + experiment_manager = ExperimentManager.load(filename) + best_params_dict = experiment_manager.optimize_hyperparams( **message.params["kwargs"] ) - agent_manager.save() - del agent_manager + experiment_manager.save() + del experiment_manager response = interface.Message.create(data=best_params_dict) # AGENT_MANAGER_GET_WRITER_DATA elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA: # writer scalar data filename = message.params["filename"] - agent_manager = AgentManager.load(filename) - writer_data = agent_manager.get_writer_data() + experiment_manager = ExperimentManager.load(filename) + writer_data = experiment_manager.get_writer_data() writer_data = writer_data or dict() for idx in writer_data: writer_data[idx] = writer_data[idx].to_csv(index=False) # tensoboard data tensorboard_bin_data = None - if agent_manager.tensorboard_dir is not None: + if experiment_manager.tensorboard_dir is not None: tensorboard_zip_file = rlberry.utils.io.zipdir( - agent_manager.tensorboard_dir, - agent_manager.output_dir / "tensorboard_data.zip", + experiment_manager.tensorboard_dir, + experiment_manager.output_dir / "tensorboard_data.zip", ) if tensorboard_zip_file is not None: tensorboard_bin_data = open(tensorboard_zip_file, "rb").read() @@ -113,6 +113,6 @@ def execute_message( writer_data=writer_data, tensorboard_bin_data=tensorboard_bin_data ) ) - del agent_manager + del experiment_manager # end return response diff --git a/rlberry/network/tests/test_server.py b/rlberry/network/tests/test_server.py index 1f2f6164d..8f3bf3ca1 100644 --- a/rlberry/network/tests/test_server.py +++ b/rlberry/network/tests/test_server.py @@ -8,7 +8,7 @@ from rlberry.network.client import BerryClient from rlberry.network import interface from rlberry.network.interface import Message, ResourceRequest -from rlberry.manager.remote_agent_manager import RemoteAgentManager +from rlberry.manager import RemoteExperimentManager from rlberry.manager.evaluation import evaluate_agents server_name = "berry" @@ -31,7 +31,7 @@ class Starter(ProcessStarter): def test_client(): port = 4242 client = BerryClient(port=port) - # Send params for AgentManager + # Send params for ExperimentManager client.send( Message.create( command=interface.Command.AGENT_MANAGER_CREATE_INSTANCE, @@ -64,7 +64,7 @@ def test_client(): def test_remote_manager(): port = 4242 client = BerryClient(port=port) - remote_manager = RemoteAgentManager( + remote_manager = RemoteExperimentManager( client, agent_class=ResourceRequest(name="REINFORCEAgent"), train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v1")), @@ -86,6 +86,6 @@ def test_remote_manager(): fname1 = remote_manager.save() del remote_manager - remote_manager = RemoteAgentManager.load(fname1) + remote_manager = RemoteExperimentManager.load(fname1) remote_manager.fit(3) evaluate_agents([remote_manager], n_simulations=2, show=False) diff --git a/rlberry/utils/__init__.py b/rlberry/utils/__init__.py index 4c3b87f59..f70c962c1 100644 --- a/rlberry/utils/__init__.py +++ b/rlberry/utils/__init__.py @@ -4,6 +4,6 @@ check_save_load, check_fit_additive, check_seeding_agent, - check_agent_manager, + check_experiment_manager, ) from .check_env import check_env diff --git a/rlberry/utils/check_agent.py b/rlberry/utils/check_agent.py index b3a6741d5..7e5679556 100644 --- a/rlberry/utils/check_agent.py +++ b/rlberry/utils/check_agent.py @@ -1,6 +1,6 @@ from rlberry.envs import Chain from rlberry.envs.benchmarks.ball_exploration import PBall2D -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager import numpy as np from rlberry.seeding import set_external_seed import tempfile @@ -38,9 +38,9 @@ def _make_tuple_env(env): return env_ctor, env_kwargs -def _fit_agent_manager(agent, env="continuous_state", init_kwargs=None): +def _fit_experiment_manager(agent, env="continuous_state", init_kwargs=None): """ - Check that the agent is compatible with :class:`~rlberry.manager.AgentManager`. + Check that the agent is compatible with :class:`~rlberry.manager.ExperimentManager`. Parameters ---------- @@ -57,7 +57,7 @@ def _fit_agent_manager(agent, env="continuous_state", init_kwargs=None): train_env = _make_tuple_env(env) try: - agent = AgentManager( + agent = ExperimentManager( agent, train_env, fit_budget=5, n_fit=1, seed=SEED, init_kwargs=init_kwargs ) agent.fit() @@ -96,9 +96,9 @@ def _fit_agent(agent, env="continuous_state", init_kwargs=None): return my_agent -def check_agent_manager(agent, env="continuous_state", init_kwargs=None): +def check_experiment_manager(agent, env="continuous_state", init_kwargs=None): """ - Check that the agent is compatible with :class:`~rlberry.manager.AgentManager`. + Check that the agent is compatible with :class:`~rlberry.manager.ExperimentManager`. Parameters ---------- @@ -110,13 +110,13 @@ def check_agent_manager(agent, env="continuous_state", init_kwargs=None): init_kwargs : dict Arguments required by the agent's constructor. """ - manager = _fit_agent_manager(agent, env, init_kwargs=init_kwargs) + manager = _fit_experiment_manager(agent, env, init_kwargs=init_kwargs) assert manager is not None def check_agent_base(agent, env="continuous_state", init_kwargs=None): """ - Check that the agent is compatible with :class:`~rlberry.manager.AgentManager`. + Check that the agent is compatible with :class:`~rlberry.manager.ExperimentManager`. Parameters ---------- @@ -239,7 +239,7 @@ def _check_save_load_with_manager(agent, env="continuous_state", init_kwargs=Non train_env_tuple = _make_tuple_env(env) with tempfile.TemporaryDirectory() as tmpdirname: - manager = AgentManager( + manager = ExperimentManager( agent, train_env_tuple, fit_budget=5, @@ -276,14 +276,16 @@ def _check_save_load_with_manager(agent, env="continuous_state", init_kwargs=Non assert os.path.exists(tmpdirname) path_to_load = next(pathlib.Path(tmpdirname).glob("**/manager_obj.pickle")) - loaded_agent_manager = AgentManager.load(path_to_load) - assert loaded_agent_manager + loaded_experiment_manager = ExperimentManager.load(path_to_load) + assert loaded_experiment_manager # test with first agent of the manager observation, info = test_env.reset() for tt in range(50): - action = loaded_agent_manager.get_agent_instances()[0].policy(observation) + action = loaded_experiment_manager.get_agent_instances()[0].policy( + observation + ) next_observation, reward, terminated, truncated, info = test_env.step( action ) @@ -360,8 +362,8 @@ def check_seeding_agent(agent, env=None, continuous_state=False, init_kwargs=Non init_kwargs : dict Arguments required by the agent's constructor. """ - agent1 = _fit_agent_manager(agent, env, init_kwargs=init_kwargs) - agent2 = _fit_agent_manager(agent, env, init_kwargs=init_kwargs) + agent1 = _fit_experiment_manager(agent, env, init_kwargs=init_kwargs) + agent2 = _fit_experiment_manager(agent, env, init_kwargs=init_kwargs) result = check_agents_almost_equal( agent1.agent_handlers[0], agent2.agent_handlers[0] @@ -487,7 +489,7 @@ def check_rl_agent(agent, env="continuous_state", init_kwargs=None): >>> from rlberry.utils import check_rl_agent >>> check_rl_agent(UCBVIAgent) # which does not return an error. """ - check_agent_manager( + check_experiment_manager( agent, env, init_kwargs=init_kwargs ) # check manager compatible. check_agent_base(agent, env, init_kwargs=init_kwargs) # check without manager @@ -519,7 +521,9 @@ def check_rlberry_agent(agent, env="continuous_state", init_kwargs=None): >>> from rlberry.utils import check_rl_agent >>> check_rl_agent(UCBVIAgent) # """ - manager = _fit_agent_manager(agent, env, init_kwargs=init_kwargs).agent_handlers[0] + manager = _fit_experiment_manager( + agent, env, init_kwargs=init_kwargs + ).agent_handlers[0] try: params = manager.get_params() except Exception: @@ -562,8 +566,8 @@ def _test_hyperparam_optim_tpe(agent, env="continuous_state", init_kwargs=None): init_kwargs["seeder"] = SEED train_env = _make_tuple_env(env) - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( agent, train_env, fit_budget=1, @@ -586,8 +590,8 @@ def _test_hyperparam_optim_grid(agent, env="continuous_state", init_kwargs=None) init_kwargs["seeder"] = SEED train_env = _make_tuple_env(env) - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( agent, train_env, init_kwargs={}, @@ -612,8 +616,8 @@ def _test_hyperparam_optim_cmaes(agent, env="continuous_state", init_kwargs=None init_kwargs["seeder"] = SEED train_env = _make_tuple_env(env) - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( agent, train_env, init_kwargs={}, @@ -636,7 +640,7 @@ def _test_discount_optimization(agent, env="continuous_state", init_kwargs=None) vi_params = {"gamma": 0.1, "epsilon": 1e-3} - vi_stats = AgentManager( + vi_stats = ExperimentManager( agent, train_env, fit_budget=0, @@ -668,8 +672,8 @@ def _test_hyperparam_optim_random( init_kwargs["seeder"] = SEED train_env = _make_tuple_env(env) - # Run AgentManager - stats_agent = AgentManager( + # Run ExperimentManager + stats_agent = ExperimentManager( agent, train_env, init_kwargs={}, diff --git a/rlberry/utils/check_bandit_agent.py b/rlberry/utils/check_bandit_agent.py index e3566b1ab..89389b77f 100644 --- a/rlberry/utils/check_bandit_agent.py +++ b/rlberry/utils/check_bandit_agent.py @@ -1,5 +1,5 @@ from rlberry.envs.bandits import BernoulliBandit -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager def check_bandit_agent(Agent, environment=BernoulliBandit, seed=42): @@ -40,10 +40,10 @@ def check_bandit_agent(Agent, environment=BernoulliBandit, seed=42): env_ctor = environment env_kwargs = {} - agent1 = AgentManager( + agent1 = ExperimentManager( Agent, (env_ctor, env_kwargs), fit_budget=10, n_fit=1, seed=seed ) - agent2 = AgentManager( + agent2 = ExperimentManager( Agent, (env_ctor, env_kwargs), fit_budget=10, n_fit=1, seed=seed ) diff --git a/rlberry/utils/tests/test_check.py b/rlberry/utils/tests/test_check.py index a3be38e89..070b580d0 100644 --- a/rlberry/utils/tests/test_check.py +++ b/rlberry/utils/tests/test_check.py @@ -4,7 +4,7 @@ from rlberry.utils.check_env import check_env from rlberry.utils.check_agent import ( check_rl_agent, - _fit_agent_manager, + _fit_experiment_manager, check_agents_almost_equal, ) from rlberry.spaces import Box, Dict, Discrete @@ -72,12 +72,16 @@ def test_check_agent(): check_rl_agent(ValueIterationAgent, (Chain, {})) -def test_check_agent_manager_almost_equal(): +def test_check_experiment_manager_almost_equal(): env = GridWorld env_kwargs = {} - agent1 = _fit_agent_manager(ReferenceAgent, (env, env_kwargs)).agent_handlers[0] - agent2 = _fit_agent_manager(ReferenceAgent, (env, env_kwargs)).agent_handlers[0] - agent3 = _fit_agent_manager(UCBVIAgent, (env, env_kwargs)).agent_handlers[0] + agent1 = _fit_experiment_manager(ReferenceAgent, (env, env_kwargs)).agent_handlers[ + 0 + ] + agent2 = _fit_experiment_manager(ReferenceAgent, (env, env_kwargs)).agent_handlers[ + 0 + ] + agent3 = _fit_experiment_manager(UCBVIAgent, (env, env_kwargs)).agent_handlers[0] assert check_agents_almost_equal(agent1, agent2, compare_using="eval") assert not check_agents_almost_equal(agent1, agent3) diff --git a/rlberry/utils/tests/test_writer.py b/rlberry/utils/tests/test_writer.py index 4fd4ddfa9..1345649d2 100644 --- a/rlberry/utils/tests/test_writer.py +++ b/rlberry/utils/tests/test_writer.py @@ -1,7 +1,7 @@ import time from rlberry.envs import GridWorld from rlberry.agents import AgentWithSimplePolicy -from rlberry.manager import AgentManager +from rlberry.manager import ExperimentManager class DummyAgent(AgentWithSimplePolicy): @@ -33,7 +33,7 @@ def test_myoutput(capsys): # or use "capfd" for fd-level env_kwargs = dict() env = env_ctor(**env_kwargs) - agent = AgentManager( + agent = ExperimentManager( DummyAgent, (env_ctor, env_kwargs), fit_budget=3, diff --git a/rlberry/utils/writers.py b/rlberry/utils/writers.py index 2bb2eaa1b..2b3504df2 100644 --- a/rlberry/utils/writers.py +++ b/rlberry/utils/writers.py @@ -24,7 +24,7 @@ class DefaultWriter: Default writer to be used by the agents, optionally wraps an instance of tensorboard.SummaryWriter. Can be used in the fit() method of the agents, so - that training data can be handled by AgentManager and RemoteAgentManager. + that training data can be handled by ExperimentManager and RemoteExperimentManager. Parameters ----------