Refactorization of the repository (rlberry-py#379)

* Move some class to rlberry-scoo and rlberry-research * Update and remove some files in agents and envs * Update the .gitignore * Updates imports 'paths' * add tests to better coverage (env with action space in Box) * add tests to better coverage (observation_space as Dict) * add tests to better coverage (check_gym_env_warnings) * increase writer coverage * add tests to better coverage (check_gym_env_warnings) * add tests to better coverage (writer) * removing old doc * update rlberry-researche -> update poetry.lock * update display on API doc * add YannBerthelot to contributor * update tests on writers --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: JulienT01 <[email protected]> Co-authored-by: TimotheeMathieu <[email protected]>
BorisHamadej · Nov 21, 2023 · 98a089d · 98a089d
1 parent d92845b
commit 98a089d
Show file tree

Hide file tree

Showing 224 changed files with 417 additions and 18,516 deletions.
diff --git a/.gitignore b/.gitignore
@@ -167,4 +167,4 @@ dmypy.json
 .pydevproject
 
 
-profile.prof
+*.prof
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -153,6 +153,12 @@ jobs:
       set -xe
       pip install .
     displayName: 'Install rlberry'
+
+  - script: |
+      pip install git+https://github.com/rlberry-py/rlberry-scool.git
+      pip install git+https://github.com/rlberry-py/rlberry-research.git
+    displayName: 'Install rlberry-scool and rlberry-research'
+
   #ignore les tests qui viennent des extras : torch, experimental, stablebaselines, optuna
   - script: |
       pip install pytest==7.0.1 pytest-azurepipelines pytest-xvfb
@@ -186,6 +192,11 @@ jobs:
       pip install .
     displayName: 'Install rlberry'
 
+  - script: |
+      pip install git+https://github.com/rlberry-py/rlberry-scool.git
+      pip install git+https://github.com/rlberry-py/rlberry-research.git
+    displayName: 'Install rlberry-scool and rlberry-research'
+
   - script: |
       pip install pytest==7.0.1 pytest-azurepipelines pytest-xvfb
       pytest rlberry/tests/test_agents_base.py rlberry/tests/test_envs.py
@@ -215,6 +226,11 @@ jobs:
       pip install .
     displayName: 'Install rlberry'
 
+  - script: |
+      pip install git+https://github.com/rlberry-py/rlberry-scool.git
+      pip install git+https://github.com/rlberry-py/rlberry-research.git
+    displayName: 'Install rlberry-scool and rlberry-research'
+
   - script: |
       pip install pytest==7.0.1 pytest-azurepipelines pytest-xvfb
       pytest rlberry/tests/test_agents_base.py rlberry/tests/test_envs.py

diff --git a/codecov.yml b/codecov.yml
@@ -25,5 +25,3 @@ ignore:
   - "./rlberry/wrappers/tests/old_env/*.py"
   - "./rlberry/rendering/pygame_render2d.py"
   - "./rlberry/colab_utils/display_setup.py"
-  - "./rlberry/agents/experimental/jax/**/*.py"
-  - "./rlberry/network/**/*.py"
diff --git a/docs/api.rst b/docs/api.rst
@@ -44,25 +44,6 @@ Base classes
     agents.Agent
     agents.AgentWithSimplePolicy
 
-Basic Agents
---------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   agents.QLAgent
-   agents.SARSAAgent
-   agents.ValueIterationAgent
-   agents.MBQVIAgent
-   agents.UCBVIAgent
-   agents.RSUCBVIAgent
-   agents.RSKernelUCBVIAgent
-   agents.OptQLAgent
-   agents.LSVIUCBAgent
-   agents.RLSVIAgent
-   agents.PSRLAgent
-
 
 Agent importation tools
 -----------------------
@@ -74,22 +55,6 @@ Agent importation tools
    agents.stable_baselines.StableBaselinesAgent
 
 
-Torch Agents
----------------------------
-
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   agents.torch.SACAgent
-   agents.torch.A2CAgent
-   agents.torch.PPOAgent
-   agents.torch.DQNAgent
-   agents.torch.MunchausenDQNAgent
-   agents.torch.REINFORCEAgent
-
-
 Environments
 ============
 
@@ -116,23 +81,6 @@ Spaces
     spaces.MultiBinary
     spaces.Dict
 
-Benchmark Environments
-----------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-    envs.Acrobot
-    envs.benchmarks.ball_exploration.PBall2D
-    envs.benchmarks.generalization.twinrooms.TwinRooms
-    envs.benchmarks.grid_exploration.apple_gold.AppleGold
-    envs.benchmarks.grid_exploration.nroom.NRoom
-    envs.classic_control.MountainCar
-    envs.SpringCartPole
-    envs.finite.Chain
-    envs.finite.GridWorld
-
 
 Environment tools
 -----------------
@@ -171,6 +119,7 @@ Manager Utilitis
 .. autosummary::
    :toctree: generated/
    :template: function.rst
+
    manager.preset_manager
 
 
@@ -208,16 +157,6 @@ Logging Utilities
    utils.logging.set_level
 
 
-Typing
-------
-
-.. autosummary::
-  :toctree: generated/
-  :template: class.rst
-
-   types.Env
-
-
 Environment Wrappers
 ====================
 
@@ -230,82 +169,3 @@ Environment Wrappers
   wrappers.RescaleRewardWrapper
   wrappers.vis2d.Vis2dWrapper
   wrappers.WriterWrapper
-
-
-Neural Networks
-===============
-
-
-Torch
-------
-
-.. autosummary::
-  :toctree: generated/
-  :template: function.rst
-
-  agents.torch.utils.training.model_factory
-  utils.torch.choose_device
-
-
-.. autosummary::
-  :toctree: generated/
-  :template: class.rst
-
-  agents.torch.utils.models.MultiLayerPerceptron
-  agents.torch.utils.models.ConvolutionalNetwork
-  agents.torch.utils.models.DuelingNetwork
-  agents.torch.utils.models.Table
-
-
-Bandits
-=======
-
-Bandit environments
--------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   envs.bandits.AdversarialBandit
-   envs.bandits.Bandit
-   envs.bandits.BernoulliBandit
-   envs.bandits.NormalBandit
-   envs.bandits.CorruptedNormalBandit
-
-Bandit algorithms
------------------
-The bandits algorithms use mainly the following tracker tool:
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   agents.bandits.tools.BanditTracker
-
-Some general class of bandit algorithms are provided.
-
-.. autosummary::
-   :toctree: generated/
-   :template: class.rst
-
-   agents.bandits.BanditWithSimplePolicy
-   agents.bandits.IndexAgent
-   agents.bandits.RandomizedAgent
-   agents.bandits.TSAgent
-
-A number of indices are provided to use in bandits algorithms:
-
-.. autosummary::
-  :toctree: generated/
-  :template: function.rst
-
-  agents.bandits.makeBoundedIMEDIndex
-  agents.bandits.makeBoundedMOSSIndex
-  agents.bandits.makeBoundedNPTSIndex
-  agents.bandits.makeBoundedUCBIndex
-  agents.bandits.makeBoundedUCBVIndex
-  agents.bandits.makeETCIndex
-  agents.bandits.makeEXP3Index
-  agents.bandits.makeSubgaussianMOSSIndex
-  agents.bandits.makeSubgaussianUCBIndex
diff --git a/docs/basics/experiment_setup.rst b/docs/basics/experiment_setup.rst
@@ -36,7 +36,7 @@ This can be done very succinctly as in the example below:
 
 .. code-block:: yaml
 
-    constructor: 'rlberry.envs.benchmarks.grid_exploration.nroom.NRoom'
+    constructor: 'rlberry_research.envs.benchmarks.grid_exploration.nroom.NRoom'
     params:
         reward_free: false
         array_observation: true
@@ -46,7 +46,7 @@ This can be done very succinctly as in the example below:
 
 .. code-block:: yaml
 
-    agent_class: 'rlberry.agents.kernel_based.rs_ucbvi.RSUCBVIAgent'
+    agent_class: 'rlberry_research.agents.kernel_based.rs_ucbvi.RSUCBVIAgent'
     init_kwargs:
         gamma: 1.0
         lp_metric: 2

diff --git a/docs/basics/multiprocess.rst b/docs/basics/multiprocess.rst
@@ -29,9 +29,9 @@ The advised method of parallelization is spawn (parameter :code:`mp_context="spa
 
 .. code:: python
 
-    from rlberry.agents.torch import A2CAgent
+    from rlberry_research.agents.torch import A2CAgent
     from rlberry.manager import ExperimentManager
-    from rlberry.envs.benchmarks.ball_exploration import PBall2D
+    from rlberry_research.envs.benchmarks.ball_exploration import PBall2D
 
     n_steps = 1e5
     batch_size = 256

diff --git a/docs/basics/rlberry how to.rst b/docs/basics/rlberry how to.rst
@@ -6,7 +6,7 @@ Libraries
     import numpy as np
     import pandas as pd
     from rlberry.agents import ValueIterationAgent, AgentWithSimplePolicy
-    from rlberry.envs import GridWorld
+    from rlberry_research.envs import GridWorld
     from rlberry.manager import ExperimentManager, evaluate_agents
 
 

diff --git a/docs/contributors.rst b/docs/contributors.rst
@@ -65,4 +65,8 @@
     <a href='https://github.com/riccardodv'><img src='https://avatars.githubusercontent.com/u/18311484?v=4' class='avatar' /></a> <br />
     <p>Riccardo Della Vecchia</p>
     </div>
+    <div>
+    <a href='https://github.com/YannBerthelot'><img src='https://avatars.githubusercontent.com/u/49097534?v=4' class='avatar' /></a> <br />
+    <p>YannBerthelot</p>
+    </div>
     </div>
diff --git a/examples/comparison_agents.py b/examples/comparison_agents.py
@@ -15,9 +15,9 @@
 
 from rlberry.manager.comparison import compare_agents
 from rlberry.manager import AgentManager
-from rlberry.envs.bandits import BernoulliBandit
+from rlberry_research.envs.bandits import BernoulliBandit
 from rlberry.wrappers import WriterWrapper
-from rlberry.agents.bandits import (
+from rlberry_research.agents.bandits import (
     IndexAgent,
     makeBoundedMOSSIndex,
     makeBoundedNPTSIndex,

diff --git a/examples/demo_agents/video_plot_a2c.py b/examples/demo_agents/video_plot_a2c.py
@@ -11,8 +11,8 @@
 """
 # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_a2c.jpg'
 
-from rlberry.agents.torch import A2CAgent
-from rlberry.envs.benchmarks.ball_exploration import PBall2D
+from rlberry_research.agents.torch import A2CAgent
+from rlberry_research.envs.benchmarks.ball_exploration import PBall2D
 from gymnasium.wrappers import TimeLimit
 
 

diff --git a/examples/demo_agents/video_plot_mbqvi.py b/examples/demo_agents/video_plot_mbqvi.py
@@ -10,8 +10,8 @@
 
 """
 # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_mbqvi.jpg'
-from rlberry.agents.mbqvi import MBQVIAgent
-from rlberry.envs.finite import GridWorld
+from rlberry_scool.agents.mbqvi import MBQVIAgent
+from rlberry_research.envs.finite import GridWorld
 
 params = {}
 params["n_samples"] = 100  # samples per state-action pair

diff --git a/examples/demo_agents/video_plot_ppo.py b/examples/demo_agents/video_plot_ppo.py
@@ -11,8 +11,8 @@
 """
 # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_a2c.jpg'
 
-from rlberry.agents.torch import PPOAgent
-from rlberry.envs.benchmarks.ball_exploration import PBall2D
+from rlberry_research.agents.torch import PPOAgent
+from rlberry_research.envs.benchmarks.ball_exploration import PBall2D
 
 
 env = PBall2D()

diff --git a/examples/demo_agents/video_plot_vi.py b/examples/demo_agents/video_plot_vi.py
@@ -11,8 +11,8 @@
 """
 # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_vi.jpg'
 
-from rlberry.agents.dynprog import ValueIterationAgent
-from rlberry.envs.finite import Chain
+from rlberry_research.agents.dynprog import ValueIterationAgent
+from rlberry_research.envs.finite import Chain
 
 env = Chain()
 agent = ValueIterationAgent(env, gamma=0.95)

diff --git a/examples/demo_bandits/plot_TS_bandit.py b/examples/demo_bandits/plot_TS_bandit.py
@@ -11,8 +11,8 @@
 """
 
 import numpy as np
-from rlberry.envs.bandits import BernoulliBandit, NormalBandit
-from rlberry.agents.bandits import (
+from rlberry_research.envs.bandits import BernoulliBandit, NormalBandit
+from rlberry_research.agents.bandits import (
     IndexAgent,
     TSAgent,
     makeBoundedUCBIndex,

diff --git a/examples/demo_bandits/plot_compare_index_bandits.py b/examples/demo_bandits/plot_compare_index_bandits.py
@@ -8,10 +8,10 @@
 """
 import numpy as np
 import matplotlib.pyplot as plt
-from rlberry.envs.bandits import BernoulliBandit
+from rlberry_research.envs.bandits import BernoulliBandit
 from rlberry.manager import ExperimentManager, plot_writer_data
 from rlberry.wrappers import WriterWrapper
-from rlberry.agents.bandits import (
+from rlberry_research.agents.bandits import (
     IndexAgent,
     RandomizedAgent,
     makeBoundedIMEDIndex,

diff --git a/examples/demo_bandits/plot_exp3_bandit.py b/examples/demo_bandits/plot_exp3_bandit.py
@@ -8,8 +8,8 @@
 """
 
 import numpy as np
-from rlberry.envs.bandits import AdversarialBandit
-from rlberry.agents.bandits import (
+from rlberry_research.envs.bandits import AdversarialBandit
+from rlberry_research.agents.bandits import (
     RandomizedAgent,
     TSAgent,
     makeEXP3Index,

diff --git a/examples/demo_bandits/plot_mirror_bandit.py b/examples/demo_bandits/plot_mirror_bandit.py
@@ -16,7 +16,7 @@
 
 from rlberry.manager import ExperimentManager, read_writer_data
 from rlberry.envs.interface import Model
-from rlberry.agents.bandits import BanditWithSimplePolicy
+from rlberry_research.agents.bandits import BanditWithSimplePolicy
 from rlberry.wrappers import WriterWrapper
 import rlberry.spaces as spaces
 

diff --git a/examples/demo_bandits/plot_ucb_bandit.py b/examples/demo_bandits/plot_ucb_bandit.py
@@ -7,8 +7,8 @@
 """
 
 import numpy as np
-from rlberry.envs.bandits import NormalBandit
-from rlberry.agents.bandits import IndexAgent, makeSubgaussianUCBIndex
+from rlberry_research.envs.bandits import NormalBandit
+from rlberry_research.agents.bandits import IndexAgent, makeSubgaussianUCBIndex
 from rlberry.manager import ExperimentManager, plot_writer_data
 import matplotlib.pyplot as plt
 from rlberry.wrappers import WriterWrapper

diff --git a/examples/demo_env/video_plot_apple_gold.py b/examples/demo_env/video_plot_apple_gold.py
@@ -10,8 +10,8 @@
 
 """
 # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_apple_gold.jpg'
-from rlberry.envs.benchmarks.grid_exploration.apple_gold import AppleGold
-from rlberry.agents.dynprog import ValueIterationAgent
+from rlberry_research.envs.benchmarks.grid_exploration.apple_gold import AppleGold
+from rlberry_research.agents.dynprog import ValueIterationAgent
 
 env = AppleGold(reward_free=False, array_observation=False)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -167,4 +167,4 @@ dmypy.json
		.pydevproject


		profile.prof
		*.prof