diff --git a/preview_pr/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip b/preview_pr/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
index f24e70378..ed060a155 100644
Binary files a/preview_pr/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip and b/preview_pr/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip differ
diff --git a/preview_pr/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip b/preview_pr/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
index bd5356975..053171d79 100644
Binary files a/preview_pr/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip and b/preview_pr/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip differ
diff --git a/preview_pr/_images/sphx_glr_plot_TS_bandit_001.png b/preview_pr/_images/sphx_glr_plot_TS_bandit_001.png
index 2662475ba..75d2f1949 100644
Binary files a/preview_pr/_images/sphx_glr_plot_TS_bandit_001.png and b/preview_pr/_images/sphx_glr_plot_TS_bandit_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_TS_bandit_002.png b/preview_pr/_images/sphx_glr_plot_TS_bandit_002.png
index 0033810a4..cd5f044cf 100644
Binary files a/preview_pr/_images/sphx_glr_plot_TS_bandit_002.png and b/preview_pr/_images/sphx_glr_plot_TS_bandit_002.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_TS_bandit_thumb.png b/preview_pr/_images/sphx_glr_plot_TS_bandit_thumb.png
index 7bd8a8c64..27318e3c0 100644
Binary files a/preview_pr/_images/sphx_glr_plot_TS_bandit_thumb.png and b/preview_pr/_images/sphx_glr_plot_TS_bandit_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_agent_manager_001.png b/preview_pr/_images/sphx_glr_plot_agent_manager_001.png
index 029a0feda..e43f70166 100644
Binary files a/preview_pr/_images/sphx_glr_plot_agent_manager_001.png and b/preview_pr/_images/sphx_glr_plot_agent_manager_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_agent_manager_thumb.png b/preview_pr/_images/sphx_glr_plot_agent_manager_thumb.png
index 255ac346e..51e9c14cb 100644
Binary files a/preview_pr/_images/sphx_glr_plot_agent_manager_thumb.png and b/preview_pr/_images/sphx_glr_plot_agent_manager_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_exp3_bandit_001.png b/preview_pr/_images/sphx_glr_plot_exp3_bandit_001.png
index 41b83bf73..d22fd9a0c 100644
Binary files a/preview_pr/_images/sphx_glr_plot_exp3_bandit_001.png and b/preview_pr/_images/sphx_glr_plot_exp3_bandit_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_exp3_bandit_thumb.png b/preview_pr/_images/sphx_glr_plot_exp3_bandit_thumb.png
index 532a2dea1..9686512f4 100644
Binary files a/preview_pr/_images/sphx_glr_plot_exp3_bandit_thumb.png and b/preview_pr/_images/sphx_glr_plot_exp3_bandit_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_mirror_bandit_001.png b/preview_pr/_images/sphx_glr_plot_mirror_bandit_001.png
index e7be1aa0d..efb7c8f44 100644
Binary files a/preview_pr/_images/sphx_glr_plot_mirror_bandit_001.png and b/preview_pr/_images/sphx_glr_plot_mirror_bandit_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_mirror_bandit_thumb.png b/preview_pr/_images/sphx_glr_plot_mirror_bandit_thumb.png
index e7fcce3ac..6bba3c904 100644
Binary files a/preview_pr/_images/sphx_glr_plot_mirror_bandit_thumb.png and b/preview_pr/_images/sphx_glr_plot_mirror_bandit_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_smooth_001.png b/preview_pr/_images/sphx_glr_plot_smooth_001.png
index 8d37935bd..7691eb761 100644
Binary files a/preview_pr/_images/sphx_glr_plot_smooth_001.png and b/preview_pr/_images/sphx_glr_plot_smooth_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_smooth_002.png b/preview_pr/_images/sphx_glr_plot_smooth_002.png
index ee1ef8f0a..5eba80116 100644
Binary files a/preview_pr/_images/sphx_glr_plot_smooth_002.png and b/preview_pr/_images/sphx_glr_plot_smooth_002.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_smooth_thumb.png b/preview_pr/_images/sphx_glr_plot_smooth_thumb.png
index a9d75591c..d726af1da 100644
Binary files a/preview_pr/_images/sphx_glr_plot_smooth_thumb.png and b/preview_pr/_images/sphx_glr_plot_smooth_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_ucb_bandit_001.png b/preview_pr/_images/sphx_glr_plot_ucb_bandit_001.png
index 5298c5773..73466d1c2 100644
Binary files a/preview_pr/_images/sphx_glr_plot_ucb_bandit_001.png and b/preview_pr/_images/sphx_glr_plot_ucb_bandit_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_ucb_bandit_thumb.png b/preview_pr/_images/sphx_glr_plot_ucb_bandit_thumb.png
index 26c01fddf..878c1389c 100644
Binary files a/preview_pr/_images/sphx_glr_plot_ucb_bandit_thumb.png and b/preview_pr/_images/sphx_glr_plot_ucb_bandit_thumb.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_writer_wrapper_001.png b/preview_pr/_images/sphx_glr_plot_writer_wrapper_001.png
index 9c3cdaf9b..aea8c4791 100644
Binary files a/preview_pr/_images/sphx_glr_plot_writer_wrapper_001.png and b/preview_pr/_images/sphx_glr_plot_writer_wrapper_001.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_writer_wrapper_002.png b/preview_pr/_images/sphx_glr_plot_writer_wrapper_002.png
index 29e10a4eb..81cec3960 100644
Binary files a/preview_pr/_images/sphx_glr_plot_writer_wrapper_002.png and b/preview_pr/_images/sphx_glr_plot_writer_wrapper_002.png differ
diff --git a/preview_pr/_images/sphx_glr_plot_writer_wrapper_thumb.png b/preview_pr/_images/sphx_glr_plot_writer_wrapper_thumb.png
index ba707a941..870c3b106 100644
Binary files a/preview_pr/_images/sphx_glr_plot_writer_wrapper_thumb.png and b/preview_pr/_images/sphx_glr_plot_writer_wrapper_thumb.png differ
diff --git a/preview_pr/_modules/rlberry/manager/utils.html b/preview_pr/_modules/rlberry/manager/utils.html
index 765fa1bcc..ed011f8c7 100644
--- a/preview_pr/_modules/rlberry/manager/utils.html
+++ b/preview_pr/_modules/rlberry/manager/utils.html
@@ -137,9 +137,26 @@ <h1>Source code for rlberry.manager.utils</h1><div class="highlight"><pre>
 
 <div class="viewcode-block" id="tensorboard_folder_to_dataframe"><a class="viewcode-back" href="../../../generated/rlberry.manager.tensorboard_folder_to_dataframe.html#rlberry.manager.tensorboard_folder_to_dataframe">[docs]</a><span class="k">def</span> <span class="nf">tensorboard_folder_to_dataframe</span><span class="p">(</span><span class="n">path_to_tensorboard_data</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">    path_to_tensorboard_data : path to the tensorboard data. It must be the parent folder of all the training, and the event have to be in this kind of path : &lt;path_to_tensorboard_data/algo_name/n_simu/events.out.tfevents.xxxxx&gt;</span>
-
-<span class="sd">    Return a dict of panda dataframe (key = tag, value = panda.dataframe)</span>
+<span class="sd">    Function to convert &#39;tensorboard log&#39; to &#39;Panda DataFrames&#39;</span>
+
+<span class="sd">    To convert the &#39;tensorboard log&#39;, the input must be must be the path to &quot;the parent folder of all the training log&quot; (path_to_tensorboard_data), and the &#39;events.out.tfevents&#39; files have to be in this kind of path :</span>
+<span class="sd">      &lt; path_to_tensorboard_data/algo_name/n_simu/events.out.tfevents.xxxxx &gt;</span>
+
+<span class="sd">    The output format is a dictionary.</span>
+<span class="sd">    key = tag (type of data)</span>
+<span class="sd">    value = Panda DataFrame with the following structure (4 column) :</span>
+<span class="sd">        &quot;name&quot; = algo_name</span>
+<span class="sd">        &quot;n_simu&quot; = n_simu (seed)</span>
+<span class="sd">        &quot;x&quot; = step number</span>
+<span class="sd">        &quot;y&quot; = value of the data</span>
+
+<span class="sd">    Parameters</span>
+<span class="sd">    ----------</span>
+<span class="sd">    path_to_tensorboard_data : path to the parent folder of the tensorboard&#39;s data.</span>
+
+<span class="sd">    Returns</span>
+<span class="sd">    -------</span>
+<span class="sd">    Dict : dict of Panda DataFrame (key = tag, value = Panda.DataFrame)</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="kn">from</span> <span class="nn">tensorboard.backend.event_processing</span> <span class="kn">import</span> <span class="n">event_accumulator</span>
 
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/plot_TS_bandit.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/plot_TS_bandit.rst.txt
index 5a9fe0de3..c4e6b3f58 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/plot_TS_bandit.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/plot_TS_bandit.rst.txt
@@ -54,21 +54,21 @@ For the Gaussian case, we use a Gaussian prior and compare it to a sub-Gaussian
 
  .. code-block:: none
 
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(Bounded UCB Agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Bounded UCB Agent_2024-10-16_10-02-15_bd80da50/manager_obj.pickle' 
-    [INFO] 10:02: Running ExperimentManager fit() for Bernoulli TS Agent with n_fit = 10 and max_workers = None. 
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(Bernoulli TS Agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Bernoulli TS Agent_2024-10-16_10-02-15_847030c5/manager_obj.pickle' 
-    [INFO] 10:02: Running ExperimentManager fit() for Gaussian UCB Agent with n_fit = 10 and max_workers = None. 
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(Gaussian UCB Agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Gaussian UCB Agent_2024-10-16_10-02-26_8ade7c97/manager_obj.pickle' 
-    [INFO] 10:02: Running ExperimentManager fit() for Gaussian TS Agent with n_fit = 10 and max_workers = None. 
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(Gaussian TS Agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Gaussian TS Agent_2024-10-16_10-02-26_ae7d0a45/manager_obj.pickle' 
+    [INFO] 12:45: ... trained! 
+    [INFO] 12:45: Saved ExperimentManager(Bounded UCB Agent) using pickle. 
+    [INFO] 12:45: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Bounded UCB Agent_2024-10-16_12-45-04_319cb7e6/manager_obj.pickle' 
+    [INFO] 12:45: Running ExperimentManager fit() for Bernoulli TS Agent with n_fit = 10 and max_workers = None. 
+    [INFO] 12:45: ... trained! 
+    [INFO] 12:45: Saved ExperimentManager(Bernoulli TS Agent) using pickle. 
+    [INFO] 12:45: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Bernoulli TS Agent_2024-10-16_12-45-04_279a356f/manager_obj.pickle' 
+    [INFO] 12:45: Running ExperimentManager fit() for Gaussian UCB Agent with n_fit = 10 and max_workers = None. 
+    [INFO] 12:45: ... trained! 
+    [INFO] 12:45: Saved ExperimentManager(Gaussian UCB Agent) using pickle. 
+    [INFO] 12:45: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Gaussian UCB Agent_2024-10-16_12-45-15_f7d90f4f/manager_obj.pickle' 
+    [INFO] 12:45: Running ExperimentManager fit() for Gaussian TS Agent with n_fit = 10 and max_workers = None. 
+    [INFO] 12:45: ... trained! 
+    [INFO] 12:45: Saved ExperimentManager(Gaussian TS Agent) using pickle. 
+    [INFO] 12:45: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/Gaussian TS Agent_2024-10-16_12-45-15_9c0d4b7a/manager_obj.pickle' 
 
 
 
@@ -224,7 +224,7 @@ For the Gaussian case, we use a Gaussian prior and compare it to a sub-Gaussian
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 20.469 seconds)
+   **Total running time of the script:** (0 minutes 20.350 seconds)
 
 
 .. _sphx_glr_download_auto_examples_demo_bandits_plot_TS_bandit.py:
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/plot_compare_index_bandits.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/plot_compare_index_bandits.rst.txt
index b26f44423..8b7aff3a1 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/plot_compare_index_bandits.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/plot_compare_index_bandits.rst.txt
@@ -276,7 +276,7 @@ how to use subplots in with `plot_writer_data`
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 41.147 seconds)
+   **Total running time of the script:** (0 minutes 41.099 seconds)
 
 
 .. _sphx_glr_download_auto_examples_demo_bandits_plot_compare_index_bandits.py:
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/plot_exp3_bandit.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/plot_exp3_bandit.rst.txt
index 7b8250720..a371f0460 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/plot_exp3_bandit.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/plot_exp3_bandit.rst.txt
@@ -155,7 +155,7 @@ randomized algorithm.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 7.648 seconds)
+   **Total running time of the script:** (0 minutes 7.662 seconds)
 
 
 .. _sphx_glr_download_auto_examples_demo_bandits_plot_exp3_bandit.py:
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/plot_mirror_bandit.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/plot_mirror_bandit.rst.txt
index 3477d781e..d5ff6c204 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/plot_mirror_bandit.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/plot_mirror_bandit.rst.txt
@@ -45,32 +45,32 @@ and finally definition of the experiment.
 
  .. code-block:: none
 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.821    2           4 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.465    7           9 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.459    7           17 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.464    7           25 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.314    7           33 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.301    3           43 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.313    6           53 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.306    3           63 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.308    6           73 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.304    6           83 
-    [INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                             SH        0     -0.307    6           93 
-    [INFO] 10:03: ... trained! 
-    [INFO] 10:03: Saved ExperimentManager(SH) using pickle. 
-    [INFO] 10:03: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/SH_2024-10-16_10-03-17_feb7766d/manager_obj.pickle' 
-    The optimal action (fastest server) is server number  7
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.625    3           5 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.458    1           11 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.306    2           20 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.61     1           27 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.301    6           36 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.309    4           46 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.303    6           56 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.306    7           66 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.303    6           73 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.302    6           83 
+    [INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                             SH        0     -0.301    6           93 
+    [INFO] 12:46: ... trained! 
+    [INFO] 12:46: Saved ExperimentManager(SH) using pickle. 
+    [INFO] 12:46: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/SH_2024-10-16_12-46-06_13e762a8/manager_obj.pickle' 
+    The optimal action (fastest server) is server number  8
 
 
 
@@ -251,7 +251,7 @@ and finally definition of the experiment.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 37.576 seconds)
+   **Total running time of the script:** (0 minutes 36.308 seconds)
 
 
 .. _sphx_glr_download_auto_examples_demo_bandits_plot_mirror_bandit.py:
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/plot_ucb_bandit.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/plot_ucb_bandit.rst.txt
index e6a64390b..f5865fdab 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/plot_ucb_bandit.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/plot_ucb_bandit.rst.txt
@@ -105,7 +105,7 @@ This script shows how to define a bandit environment and an UCB Index-based algo
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 4.924 seconds)
+   **Total running time of the script:** (0 minutes 4.902 seconds)
 
 
 .. _sphx_glr_download_auto_examples_demo_bandits_plot_ucb_bandit.py:
diff --git a/preview_pr/_sources/auto_examples/demo_bandits/sg_execution_times.rst.txt b/preview_pr/_sources/auto_examples/demo_bandits/sg_execution_times.rst.txt
index d6cbd8e17..0752b60e9 100644
--- a/preview_pr/_sources/auto_examples/demo_bandits/sg_execution_times.rst.txt
+++ b/preview_pr/_sources/auto_examples/demo_bandits/sg_execution_times.rst.txt
@@ -6,16 +6,16 @@
 
 Computation times
 =================
-**01:51.763** total execution time for **auto_examples_demo_bandits** files:
+**01:50.320** total execution time for **auto_examples_demo_bandits** files:
 
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_demo_bandits_plot_compare_index_bandits.py` (``plot_compare_index_bandits.py``) | 00:41.147 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_demo_bandits_plot_compare_index_bandits.py` (``plot_compare_index_bandits.py``) | 00:41.099 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_demo_bandits_plot_mirror_bandit.py` (``plot_mirror_bandit.py``)                 | 00:37.576 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_demo_bandits_plot_mirror_bandit.py` (``plot_mirror_bandit.py``)                 | 00:36.308 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_demo_bandits_plot_TS_bandit.py` (``plot_TS_bandit.py``)                         | 00:20.469 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_demo_bandits_plot_TS_bandit.py` (``plot_TS_bandit.py``)                         | 00:20.350 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_demo_bandits_plot_exp3_bandit.py` (``plot_exp3_bandit.py``)                     | 00:07.648 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_demo_bandits_plot_exp3_bandit.py` (``plot_exp3_bandit.py``)                     | 00:07.662 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_demo_bandits_plot_ucb_bandit.py` (``plot_ucb_bandit.py``)                       | 00:04.924 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_demo_bandits_plot_ucb_bandit.py` (``plot_ucb_bandit.py``)                       | 00:04.902 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/preview_pr/_sources/auto_examples/plot_agent_manager.rst.txt b/preview_pr/_sources/auto_examples/plot_agent_manager.rst.txt
index ed3725e70..63fa51f16 100644
--- a/preview_pr/_sources/auto_examples/plot_agent_manager.rst.txt
+++ b/preview_pr/_sources/auto_examples/plot_agent_manager.rst.txt
@@ -49,18 +49,18 @@ Finally, we compare with a baseline provided by a random policy using the Experi
 
  .. code-block:: none
 
-    [INFO] 10:01: ... trained! 
-    [INFO] 10:01: Saved ExperimentManager(ValueIterationAgent) using pickle. 
-    [INFO] 10:01: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/ValueIterationAgent_2024-10-16_10-01-58_8d70b7e4/manager_obj.pickle' 
-    [INFO] 10:01: Running ExperimentManager fit() for RandomAgent with n_fit = 1 and max_workers = None. 
-    [INFO] 10:01: ... trained! 
-    [INFO] 10:01: Saved ExperimentManager(RandomAgent) using pickle. 
-    [INFO] 10:01: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/RandomAgent_2024-10-16_10-01-59_00aad013/manager_obj.pickle' 
-    [INFO] 10:01: Evaluating ValueIterationAgent... 
-    [INFO] 10:01: Computing 10 evaluations. 
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(ValueIterationAgent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/ValueIterationAgent_2024-10-16_12-44-47_80b8b7fe/manager_obj.pickle' 
+    [INFO] 12:44: Running ExperimentManager fit() for RandomAgent with n_fit = 1 and max_workers = None. 
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(RandomAgent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/RandomAgent_2024-10-16_12-44-48_7ba13ec9/manager_obj.pickle' 
+    [INFO] 12:44: Evaluating ValueIterationAgent... 
+    [INFO] 12:44: Computing 10 evaluations. 
     [INFO] Evaluation:..........  Evaluation finished 
-    [INFO] 10:01: Evaluating RandomAgent... 
-    [INFO] 10:01: Computing 10 evaluations. 
+    [INFO] 12:44: Evaluating RandomAgent... 
+    [INFO] 12:44: Computing 10 evaluations. 
     [INFO] Evaluation:..........  Evaluation finished 
 
 
@@ -186,7 +186,7 @@ Finally, we compare with a baseline provided by a random policy using the Experi
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 1.233 seconds)
+   **Total running time of the script:** (0 minutes 1.153 seconds)
 
 
 .. _sphx_glr_download_auto_examples_plot_agent_manager.py:
diff --git a/preview_pr/_sources/auto_examples/plot_checkpointing.rst.txt b/preview_pr/_sources/auto_examples/plot_checkpointing.rst.txt
index 6024042d0..b890ce836 100644
--- a/preview_pr/_sources/auto_examples/plot_checkpointing.rst.txt
+++ b/preview_pr/_sources/auto_examples/plot_checkpointing.rst.txt
@@ -41,37 +41,37 @@ your agents, and how to restore from a previous checkpoint.
 
  .. code-block:: none
 
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(my-agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle' 
-    [INFO] 10:02: Saved ExperimentManager(my-agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle' 
-
-     Saved manager at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle.
-
-    [INFO] 10:02: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None. 
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 500)
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 500)
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 1000)
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 1000)
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(my-agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle' 
-
-     Loading manager from rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle.
-
-    [INFO] 10:02: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None. 
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(my-agent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle' 
+    [INFO] 12:44: Saved ExperimentManager(my-agent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle' 
+
+     Saved manager at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle.
+
+    [INFO] 12:44: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None. 
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 500)
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 500)
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 1000)
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 1000)
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(my-agent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle' 
+
+     Loading manager from rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle.
+
+    [INFO] 12:44: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None. 
  
-     --> MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle 
+     --> MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle 
 
  
-     --> MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle 
+     --> MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle 
 
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 1500)
-    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 1500)
-    [INFO] 10:02: ... trained! 
-    [INFO] 10:02: Saved ExperimentManager(my-agent) using pickle. 
-    [INFO] 10:02: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle' 
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 1500)
+    checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 1500)
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(my-agent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle' 
 
 
 
@@ -166,7 +166,7 @@ your agents, and how to restore from a previous checkpoint.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 1.589 seconds)
+   **Total running time of the script:** (0 minutes 1.500 seconds)
 
 
 .. _sphx_glr_download_auto_examples_plot_checkpointing.py:
diff --git a/preview_pr/_sources/auto_examples/plot_kernels.rst.txt b/preview_pr/_sources/auto_examples/plot_kernels.rst.txt
index 8e77c2657..4dd4f1b46 100644
--- a/preview_pr/_sources/auto_examples/plot_kernels.rst.txt
+++ b/preview_pr/_sources/auto_examples/plot_kernels.rst.txt
@@ -69,7 +69,7 @@ This script requires matplotlib
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 10.659 seconds)
+   **Total running time of the script:** (0 minutes 10.663 seconds)
 
 
 .. _sphx_glr_download_auto_examples_plot_kernels.py:
diff --git a/preview_pr/_sources/auto_examples/plot_smooth.rst.txt b/preview_pr/_sources/auto_examples/plot_smooth.rst.txt
index 208b9d252..dcc73df53 100644
--- a/preview_pr/_sources/auto_examples/plot_smooth.rst.txt
+++ b/preview_pr/_sources/auto_examples/plot_smooth.rst.txt
@@ -194,7 +194,7 @@ This script shows how to define a bandit environment and an UCB Index-based algo
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 22.894 seconds)
+   **Total running time of the script:** (0 minutes 22.489 seconds)
 
 
 .. _sphx_glr_download_auto_examples_plot_smooth.py:
diff --git a/preview_pr/_sources/auto_examples/plot_writer_wrapper.rst.txt b/preview_pr/_sources/auto_examples/plot_writer_wrapper.rst.txt
index c115792ad..cc82abbab 100644
--- a/preview_pr/_sources/auto_examples/plot_writer_wrapper.rst.txt
+++ b/preview_pr/_sources/auto_examples/plot_writer_wrapper.rst.txt
@@ -63,9 +63,9 @@ during the fit of the agent and then use the plot utils.
 
  .. code-block:: none
 
-    [INFO] 10:01: ... trained! 
-    [INFO] 10:01: Saved ExperimentManager(UCBVIAgent) using pickle. 
-    [INFO] 10:01: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/UCBVIAgent_2024-10-16_10-01-31_7758b26c/manager_obj.pickle' 
+    [INFO] 12:44: ... trained! 
+    [INFO] 12:44: Saved ExperimentManager(UCBVIAgent) using pickle. 
+    [INFO] 12:44: The ExperimentManager was saved in : 'rlberry_data/temp/manager_data/UCBVIAgent_2024-10-16_12-44-21_f34c3498/manager_obj.pickle' 
 
 
 
@@ -141,7 +141,7 @@ during the fit of the agent and then use the plot utils.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** (0 minutes 3.353 seconds)
+   **Total running time of the script:** (0 minutes 3.395 seconds)
 
 
 .. _sphx_glr_download_auto_examples_plot_writer_wrapper.py:
diff --git a/preview_pr/_sources/auto_examples/sg_execution_times.rst.txt b/preview_pr/_sources/auto_examples/sg_execution_times.rst.txt
index f5f30e6a9..e929db9b2 100644
--- a/preview_pr/_sources/auto_examples/sg_execution_times.rst.txt
+++ b/preview_pr/_sources/auto_examples/sg_execution_times.rst.txt
@@ -6,18 +6,18 @@
 
 Computation times
 =================
-**00:39.728** total execution time for **auto_examples** files:
+**00:39.200** total execution time for **auto_examples** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_plot_smooth.py` (``plot_smooth.py``)                 | 00:22.894 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_plot_smooth.py` (``plot_smooth.py``)                 | 00:22.489 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_plot_kernels.py` (``plot_kernels.py``)               | 00:10.659 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_plot_kernels.py` (``plot_kernels.py``)               | 00:10.663 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_plot_writer_wrapper.py` (``plot_writer_wrapper.py``) | 00:03.353 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_plot_writer_wrapper.py` (``plot_writer_wrapper.py``) | 00:03.395 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_plot_checkpointing.py` (``plot_checkpointing.py``)   | 00:01.589 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_plot_checkpointing.py` (``plot_checkpointing.py``)   | 00:01.500 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_auto_examples_plot_agent_manager.py` (``plot_agent_manager.py``)   | 00:01.233 | 0.0 MB |
+| :ref:`sphx_glr_auto_examples_plot_agent_manager.py` (``plot_agent_manager.py``)   | 00:01.153 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_auto_examples_adastop_example.py` (``adastop_example.py``)         | 00:00.000 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/preview_pr/_sources/basics/userguide/export_training_data.md.txt b/preview_pr/_sources/basics/userguide/export_training_data.md.txt
index cfe54c358..25c7bf1e0 100644
--- a/preview_pr/_sources/basics/userguide/export_training_data.md.txt
+++ b/preview_pr/_sources/basics/userguide/export_training_data.md.txt
@@ -195,7 +195,7 @@ model2_seed2.learn(total_timesteps=5_000, tb_log_name="A2C")
 ```
 
 
-Then, if you need to these logs in a pandas dataframe, you can use the tool `tensorboard_folder_to_dataframe`.
+Then, if you need to these logs in a pandas dataframe, you can use the tool [tensorboard_folder_to_dataframe](rlberry.manager.tensorboard_folder_to_dataframe).
 It will give you a `Dict` with all the scalar data from the tensorboad folder.
 
     - The `keys` will be the "tag" (the name of the measure)
diff --git a/preview_pr/api.html b/preview_pr/api.html
index 12b3a7dec..c85f8ad15 100644
--- a/preview_pr/api.html
+++ b/preview_pr/api.html
@@ -197,7 +197,7 @@ <h3>Evaluation and plot<a class="headerlink" href="#evaluation-and-plot" title="
 <td><p>Compare several trained agents using the mean over n_simulations evaluations for each agent.</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="generated/rlberry.manager.tensorboard_folder_to_dataframe.html#rlberry.manager.tensorboard_folder_to_dataframe" title="rlberry.manager.tensorboard_folder_to_dataframe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">manager.tensorboard_folder_to_dataframe</span></code></a>(...)</p></td>
-<td><p>path_to_tensorboard_data : path to the tensorboard data.</p></td>
+<td><p>Function to convert 'tensorboard log' to 'Panda DataFrames'</p></td>
 </tr>
 </tbody>
 </table>
diff --git a/preview_pr/auto_examples/demo_bandits/plot_TS_bandit.html b/preview_pr/auto_examples/demo_bandits/plot_TS_bandit.html
index 8de7bdb00..812129217 100644
--- a/preview_pr/auto_examples/demo_bandits/plot_TS_bandit.html
+++ b/preview_pr/auto_examples/demo_bandits/plot_TS_bandit.html
@@ -133,21 +133,21 @@
 <li><img src="../../_images/sphx_glr_plot_TS_bandit_001.png" srcset="../../_images/sphx_glr_plot_TS_bandit_001.png" alt="Cumulative Pseudo-Regret" class = "sphx-glr-multi-img"/></li>
 <li><img src="../../_images/sphx_glr_plot_TS_bandit_002.png" srcset="../../_images/sphx_glr_plot_TS_bandit_002.png" alt="Cumulative Pseudo-Regret" class = "sphx-glr-multi-img"/></li>
 </ul>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(Bounded UCB Agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Bounded UCB Agent_2024-10-16_10-02-15_bd80da50/manager_obj.pickle&#39;
-[INFO] 10:02: Running ExperimentManager fit() for Bernoulli TS Agent with n_fit = 10 and max_workers = None.
-[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(Bernoulli TS Agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Bernoulli TS Agent_2024-10-16_10-02-15_847030c5/manager_obj.pickle&#39;
-[INFO] 10:02: Running ExperimentManager fit() for Gaussian UCB Agent with n_fit = 10 and max_workers = None.
-[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(Gaussian UCB Agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Gaussian UCB Agent_2024-10-16_10-02-26_8ade7c97/manager_obj.pickle&#39;
-[INFO] 10:02: Running ExperimentManager fit() for Gaussian TS Agent with n_fit = 10 and max_workers = None.
-[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(Gaussian TS Agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Gaussian TS Agent_2024-10-16_10-02-26_ae7d0a45/manager_obj.pickle&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 12:45: ... trained!
+[INFO] 12:45: Saved ExperimentManager(Bounded UCB Agent) using pickle.
+[INFO] 12:45: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Bounded UCB Agent_2024-10-16_12-45-04_319cb7e6/manager_obj.pickle&#39;
+[INFO] 12:45: Running ExperimentManager fit() for Bernoulli TS Agent with n_fit = 10 and max_workers = None.
+[INFO] 12:45: ... trained!
+[INFO] 12:45: Saved ExperimentManager(Bernoulli TS Agent) using pickle.
+[INFO] 12:45: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Bernoulli TS Agent_2024-10-16_12-45-04_279a356f/manager_obj.pickle&#39;
+[INFO] 12:45: Running ExperimentManager fit() for Gaussian UCB Agent with n_fit = 10 and max_workers = None.
+[INFO] 12:45: ... trained!
+[INFO] 12:45: Saved ExperimentManager(Gaussian UCB Agent) using pickle.
+[INFO] 12:45: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Gaussian UCB Agent_2024-10-16_12-45-15_f7d90f4f/manager_obj.pickle&#39;
+[INFO] 12:45: Running ExperimentManager fit() for Gaussian TS Agent with n_fit = 10 and max_workers = None.
+[INFO] 12:45: ... trained!
+[INFO] 12:45: Saved ExperimentManager(Gaussian TS Agent) using pickle.
+[INFO] 12:45: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/Gaussian TS Agent_2024-10-16_12-45-15_9c0d4b7a/manager_obj.pickle&#39;
 </pre></div>
 </div>
 <div class="line-block">
@@ -295,7 +295,7 @@
 <span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 20.469 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 20.350 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-demo-bandits-plot-ts-bandit-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/32023c981124e8ab371ee1ea95c685b3/plot_TS_bandit.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_TS_bandit.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/demo_bandits/plot_compare_index_bandits.html b/preview_pr/auto_examples/demo_bandits/plot_compare_index_bandits.html
index 0bbaefece..0a38b844b 100644
--- a/preview_pr/auto_examples/demo_bandits/plot_compare_index_bandits.html
+++ b/preview_pr/auto_examples/demo_bandits/plot_compare_index_bandits.html
@@ -343,7 +343,7 @@
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 41.147 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 41.099 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-demo-bandits-plot-compare-index-bandits-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/5f4eecea6a09a82a9d1cb5ca9b10d365/plot_compare_index_bandits.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_compare_index_bandits.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/demo_bandits/plot_exp3_bandit.html b/preview_pr/auto_examples/demo_bandits/plot_exp3_bandit.html
index aa910d532..8ee786432 100644
--- a/preview_pr/auto_examples/demo_bandits/plot_exp3_bandit.html
+++ b/preview_pr/auto_examples/demo_bandits/plot_exp3_bandit.html
@@ -239,7 +239,7 @@
 <span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 7.648 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 7.662 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-demo-bandits-plot-exp3-bandit-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/31263fddb8f9f1c2667f6022c77bba23/plot_exp3_bandit.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_exp3_bandit.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/demo_bandits/plot_mirror_bandit.html b/preview_pr/auto_examples/demo_bandits/plot_mirror_bandit.html
index f0b2a3e18..f8e1b193e 100644
--- a/preview_pr/auto_examples/demo_bandits/plot_mirror_bandit.html
+++ b/preview_pr/auto_examples/demo_bandits/plot_mirror_bandit.html
@@ -132,32 +132,32 @@
 halving’s objective.</p>
 <p>The code is in three parts: definition of environment, definition of agent,
 and finally definition of the experiment.</p>
-<img src="../../_images/sphx_glr_plot_mirror_bandit_001.png" srcset="../../_images/sphx_glr_plot_mirror_bandit_001.png" alt="plot mirror bandit" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.821    2           4
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.465    7           9
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.459    7           17
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.464    7           25
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.314    7           33
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.301    3           43
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.313    6           53
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.306    3           63
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.308    6           73
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.304    6           83
-[INFO] 10:03:        agent_name  worker  reward  action  max_global_step
-                         SH        0     -0.307    6           93
-[INFO] 10:03: ... trained!
-[INFO] 10:03: Saved ExperimentManager(SH) using pickle.
-[INFO] 10:03: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/SH_2024-10-16_10-03-17_feb7766d/manager_obj.pickle&#39;
-The optimal action (fastest server) is server number  7
+<img src="../../_images/sphx_glr_plot_mirror_bandit_001.png" srcset="../../_images/sphx_glr_plot_mirror_bandit_001.png" alt="plot mirror bandit" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.625    3           5
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.458    1           11
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.306    2           20
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.61     1           27
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.301    6           36
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.309    4           46
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.303    6           56
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.306    7           66
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.303    6           73
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.302    6           83
+[INFO] 12:46:        agent_name  worker  reward  action  max_global_step
+                         SH        0     -0.301    6           93
+[INFO] 12:46: ... trained!
+[INFO] 12:46: Saved ExperimentManager(SH) using pickle.
+[INFO] 12:46: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/SH_2024-10-16_12-46-06_13e762a8/manager_obj.pickle&#39;
+The optimal action (fastest server) is server number  8
 </pre></div>
 </div>
 <div class="line-block">
@@ -331,7 +331,7 @@
 <span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 37.576 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 36.308 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-demo-bandits-plot-mirror-bandit-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/10b71a51b8ae10571824280d3b92d89b/plot_mirror_bandit.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_mirror_bandit.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/demo_bandits/plot_ucb_bandit.html b/preview_pr/auto_examples/demo_bandits/plot_ucb_bandit.html
index 0b935194e..aff66ecac 100644
--- a/preview_pr/auto_examples/demo_bandits/plot_ucb_bandit.html
+++ b/preview_pr/auto_examples/demo_bandits/plot_ucb_bandit.html
@@ -189,7 +189,7 @@
 <span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 4.924 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 4.902 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-demo-bandits-plot-ucb-bandit-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/3c50355e16583d3c58a37da5caf1b106/plot_ucb_bandit.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_ucb_bandit.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/demo_bandits/sg_execution_times.html b/preview_pr/auto_examples/demo_bandits/sg_execution_times.html
index ab7de3420..5056fd3b5 100644
--- a/preview_pr/auto_examples/demo_bandits/sg_execution_times.html
+++ b/preview_pr/auto_examples/demo_bandits/sg_execution_times.html
@@ -120,27 +120,27 @@
         
   <section id="computation-times">
 <span id="sphx-glr-auto-examples-demo-bandits-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this heading">¶</a></h1>
-<p><strong>01:51.763</strong> total execution time for <strong>auto_examples_demo_bandits</strong> files:</p>
+<p><strong>01:50.320</strong> total execution time for <strong>auto_examples_demo_bandits</strong> files:</p>
 <table class="docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_compare_index_bandits.html#sphx-glr-auto-examples-demo-bandits-plot-compare-index-bandits-py"><span class="std std-ref">Comparison subplots of various index based bandits algorithms</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_compare_index_bandits.py</span></code>)</p></td>
-<td><p>00:41.147</p></td>
+<td><p>00:41.099</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="plot_mirror_bandit.html#sphx-glr-auto-examples-demo-bandits-plot-mirror-bandit-py"><span class="std std-ref">A demo of Bandit BAI on a real dataset to select mirrors</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_mirror_bandit.py</span></code>)</p></td>
-<td><p>00:37.576</p></td>
+<td><p>00:36.308</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_TS_bandit.html#sphx-glr-auto-examples-demo-bandits-plot-ts-bandit-py"><span class="std std-ref">Comparison of Thompson sampling and UCB on Bernoulli and Gaussian bandits</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_TS_bandit.py</span></code>)</p></td>
-<td><p>00:20.469</p></td>
+<td><p>00:20.350</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="plot_exp3_bandit.html#sphx-glr-auto-examples-demo-bandits-plot-exp3-bandit-py"><span class="std std-ref">EXP3 Bandit cumulative regret</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_exp3_bandit.py</span></code>)</p></td>
-<td><p>00:07.648</p></td>
+<td><p>00:07.662</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_ucb_bandit.html#sphx-glr-auto-examples-demo-bandits-plot-ucb-bandit-py"><span class="std std-ref">UCB Bandit cumulative regret</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_ucb_bandit.py</span></code>)</p></td>
-<td><p>00:04.924</p></td>
+<td><p>00:04.902</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/preview_pr/auto_examples/plot_agent_manager.html b/preview_pr/auto_examples/plot_agent_manager.html
index fd05877db..da783f68b 100644
--- a/preview_pr/auto_examples/plot_agent_manager.html
+++ b/preview_pr/auto_examples/plot_agent_manager.html
@@ -133,18 +133,18 @@
 <div class="math notranslate nohighlight">
 \[Q(s, a) \leftarrow \sum_{s^{\prime}} p(s'|a, s)\left( R(s, a)+\gamma \max _{a^{\prime}} Q(s^{\prime}, a^{\prime}) \right).\]</div>
 <p>Finally, we compare with a baseline provided by a random policy using the ExperimentManager class which trains, evaluates and gathers statistics about the two agents.</p>
-<img src="../_images/sphx_glr_plot_agent_manager_001.png" srcset="../_images/sphx_glr_plot_agent_manager_001.png" alt="plot agent manager" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 10:01: ... trained!
-[INFO] 10:01: Saved ExperimentManager(ValueIterationAgent) using pickle.
-[INFO] 10:01: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/ValueIterationAgent_2024-10-16_10-01-58_8d70b7e4/manager_obj.pickle&#39;
-[INFO] 10:01: Running ExperimentManager fit() for RandomAgent with n_fit = 1 and max_workers = None.
-[INFO] 10:01: ... trained!
-[INFO] 10:01: Saved ExperimentManager(RandomAgent) using pickle.
-[INFO] 10:01: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/RandomAgent_2024-10-16_10-01-59_00aad013/manager_obj.pickle&#39;
-[INFO] 10:01: Evaluating ValueIterationAgent...
-[INFO] 10:01: Computing 10 evaluations.
+<img src="../_images/sphx_glr_plot_agent_manager_001.png" srcset="../_images/sphx_glr_plot_agent_manager_001.png" alt="plot agent manager" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(ValueIterationAgent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/ValueIterationAgent_2024-10-16_12-44-47_80b8b7fe/manager_obj.pickle&#39;
+[INFO] 12:44: Running ExperimentManager fit() for RandomAgent with n_fit = 1 and max_workers = None.
+[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(RandomAgent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/RandomAgent_2024-10-16_12-44-48_7ba13ec9/manager_obj.pickle&#39;
+[INFO] 12:44: Evaluating ValueIterationAgent...
+[INFO] 12:44: Computing 10 evaluations.
 [INFO] Evaluation:..........  Evaluation finished
-[INFO] 10:01: Evaluating RandomAgent...
-[INFO] 10:01: Computing 10 evaluations.
+[INFO] 12:44: Evaluating RandomAgent...
+[INFO] 12:44: Computing 10 evaluations.
 [INFO] Evaluation:..........  Evaluation finished
 </pre></div>
 </div>
@@ -262,7 +262,7 @@
 <span class="n">output</span> <span class="o">=</span> <a href="../generated/rlberry.manager.evaluate_agents.html#rlberry.manager.evaluate_agents" title="rlberry.manager.evaluate_agents" class="sphx-glr-backref-module-rlberry-manager sphx-glr-backref-type-py-function"><span class="n">evaluate_agents</span></a><span class="p">([</span><a href="../generated/rlberry.manager.ExperimentManager.html#rlberry.manager.ExperimentManager" title="rlberry.manager.ExperimentManager" class="sphx-glr-backref-module-rlberry-manager sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">vi_stats</span></a><span class="p">,</span> <a href="../generated/rlberry.manager.ExperimentManager.html#rlberry.manager.ExperimentManager" title="rlberry.manager.ExperimentManager" class="sphx-glr-backref-module-rlberry-manager sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">baseline_stats</span></a><span class="p">],</span> <span class="n">n_simulations</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 1.233 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 1.153 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-plot-agent-manager-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../_downloads/5b2e10dcc28b048e84f30bd6901649ed/plot_agent_manager.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_agent_manager.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/plot_checkpointing.html b/preview_pr/auto_examples/plot_checkpointing.html
index d110664df..c91e9e93c 100644
--- a/preview_pr/auto_examples/plot_checkpointing.html
+++ b/preview_pr/auto_examples/plot_checkpointing.html
@@ -127,37 +127,37 @@
 <span id="checkpointing-example"></span><span id="sphx-glr-auto-examples-plot-checkpointing-py"></span><h1>Checkpointing<a class="headerlink" href="#checkpointing" title="Permalink to this heading">¶</a></h1>
 <p>This is a minimal example of how to create checkpoints while training
 your agents, and how to restore from a previous checkpoint.</p>
-<img src="../_images/sphx_glr_plot_checkpointing_001.png" srcset="../_images/sphx_glr_plot_checkpointing_001.png" alt="y(t)" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(my-agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle&#39;
-[INFO] 10:02: Saved ExperimentManager(my-agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle&#39;
+<img src="../_images/sphx_glr_plot_checkpointing_001.png" srcset="../_images/sphx_glr_plot_checkpointing_001.png" alt="y(t)" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(my-agent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle&#39;
+[INFO] 12:44: Saved ExperimentManager(my-agent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle&#39;
 
- Saved manager at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle.
+ Saved manager at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle.
 
-[INFO] 10:02: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None.
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 500)
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 500)
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 1000)
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 1000)
-[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(my-agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle&#39;
+[INFO] 12:44: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None.
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 500)
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 500)
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 1000)
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 1000)
+[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(my-agent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle&#39;
 
- Loading manager from rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle.
+ Loading manager from rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle.
 
-[INFO] 10:02: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None.
+[INFO] 12:44: Running ExperimentManager fit() for my-agent with n_fit = 2 and max_workers = None.
 
- --&gt; MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle
+ --&gt; MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle
 
 
- --&gt; MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle
+ --&gt; MyAgent loaded from checkpoint: rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle
 
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_1/checkpoint.pickle (timestep = 1500)
-checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/output_0/checkpoint.pickle (timestep = 1500)
-[INFO] 10:02: ... trained!
-[INFO] 10:02: Saved ExperimentManager(my-agent) using pickle.
-[INFO] 10:02: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_10-02-00_9aa9c456/manager_obj.pickle&#39;
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_0/checkpoint.pickle (timestep = 1500)
+checkpoint at rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/output_1/checkpoint.pickle (timestep = 1500)
+[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(my-agent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/my-agent_2024-10-16_12-44-49_e91ad515/manager_obj.pickle&#39;
 </pre></div>
 </div>
 <div class="line-block">
@@ -245,7 +245,7 @@
     <a href="../generated/rlberry.manager.plot_writer_data.html#rlberry.manager.plot_writer_data" title="rlberry.manager.plot_writer_data" class="sphx-glr-backref-module-rlberry-manager sphx-glr-backref-type-py-function"><span class="n">plot_writer_data</span></a><span class="p">(</span><a href="../generated/rlberry.manager.ExperimentManager.html#rlberry.manager.ExperimentManager" title="rlberry.manager.ExperimentManager" class="sphx-glr-backref-module-rlberry-manager sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">loaded_manager</span></a><span class="p">,</span> <span class="n">tag</span><span class="o">=</span><span class="s2">&quot;y(t)&quot;</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 1.589 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 1.500 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-plot-checkpointing-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../_downloads/ce48de95bcbfa35be204f76edd32bcbf/plot_checkpointing.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_checkpointing.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/plot_kernels.html b/preview_pr/auto_examples/plot_kernels.html
index 9d98e2d0c..ed02d8161 100644
--- a/preview_pr/auto_examples/plot_kernels.html
+++ b/preview_pr/auto_examples/plot_kernels.html
@@ -153,7 +153,7 @@
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 10.659 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 10.663 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-plot-kernels-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../_downloads/48b6c6ae9c9d8d7cd03022520c71c0dd/plot_kernels.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_kernels.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/plot_smooth.html b/preview_pr/auto_examples/plot_smooth.html
index fe47f5b07..f6aa6dc3d 100644
--- a/preview_pr/auto_examples/plot_smooth.html
+++ b/preview_pr/auto_examples/plot_smooth.html
@@ -264,7 +264,7 @@
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 22.894 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 22.489 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-plot-smooth-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../_downloads/a4c1107753a539ad344e4e53a1f4651d/plot_smooth.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_smooth.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/plot_writer_wrapper.html b/preview_pr/auto_examples/plot_writer_wrapper.html
index 7203722db..2064921a6 100644
--- a/preview_pr/auto_examples/plot_writer_wrapper.html
+++ b/preview_pr/auto_examples/plot_writer_wrapper.html
@@ -142,9 +142,9 @@
 <li><img src="../_images/sphx_glr_plot_writer_wrapper_001.png" srcset="../_images/sphx_glr_plot_writer_wrapper_001.png" alt="Cumulative Reward" class = "sphx-glr-multi-img"/></li>
 <li><img src="../_images/sphx_glr_plot_writer_wrapper_002.png" srcset="../_images/sphx_glr_plot_writer_wrapper_002.png" alt="Cumulative Reward" class = "sphx-glr-multi-img"/></li>
 </ul>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 10:01: ... trained!
-[INFO] 10:01: Saved ExperimentManager(UCBVIAgent) using pickle.
-[INFO] 10:01: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/UCBVIAgent_2024-10-16_10-01-31_7758b26c/manager_obj.pickle&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>[INFO] 12:44: ... trained!
+[INFO] 12:44: Saved ExperimentManager(UCBVIAgent) using pickle.
+[INFO] 12:44: The ExperimentManager was saved in : &#39;rlberry_data/temp/manager_data/UCBVIAgent_2024-10-16_12-44-21_f34c3498/manager_obj.pickle&#39;
 </pre></div>
 </div>
 <div class="line-block">
@@ -211,7 +211,7 @@
 <span class="n">ax</span><span class="o">.</span><span class="n">set_yscale</span><span class="p">(</span><span class="s2">&quot;log&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 3.353 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (0 minutes 3.395 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-plot-writer-wrapper-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../_downloads/451b8c16c365b401531cc14d86dea3ef/plot_writer_wrapper.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">plot_writer_wrapper.py</span></code></a></p>
diff --git a/preview_pr/auto_examples/sg_execution_times.html b/preview_pr/auto_examples/sg_execution_times.html
index 87bfc6854..f30723469 100644
--- a/preview_pr/auto_examples/sg_execution_times.html
+++ b/preview_pr/auto_examples/sg_execution_times.html
@@ -120,27 +120,27 @@
         
   <section id="computation-times">
 <span id="sphx-glr-auto-examples-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this heading">¶</a></h1>
-<p><strong>00:39.728</strong> total execution time for <strong>auto_examples</strong> files:</p>
+<p><strong>00:39.200</strong> total execution time for <strong>auto_examples</strong> files:</p>
 <table class="docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_smooth.html#sphx-glr-auto-examples-plot-smooth-py"><span class="std std-ref">Illustration of plotting tools on Bandits</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_smooth.py</span></code>)</p></td>
-<td><p>00:22.894</p></td>
+<td><p>00:22.489</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="plot_kernels.html#sphx-glr-auto-examples-plot-kernels-py"><span class="std std-ref">Plot kernel functions</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_kernels.py</span></code>)</p></td>
-<td><p>00:10.659</p></td>
+<td><p>00:10.663</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_writer_wrapper.html#sphx-glr-auto-examples-plot-writer-wrapper-py"><span class="std std-ref">Record reward during training and then plot it</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_writer_wrapper.py</span></code>)</p></td>
-<td><p>00:03.353</p></td>
+<td><p>00:03.395</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="plot_checkpointing.html#sphx-glr-auto-examples-plot-checkpointing-py"><span class="std std-ref">Checkpointing</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_checkpointing.py</span></code>)</p></td>
-<td><p>00:01.589</p></td>
+<td><p>00:01.500</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="plot_agent_manager.html#sphx-glr-auto-examples-plot-agent-manager-py"><span class="std std-ref">A demo of Experiment Manager</span></a> (<code class="docutils literal notranslate"><span class="pre">plot_agent_manager.py</span></code>)</p></td>
-<td><p>00:01.233</p></td>
+<td><p>00:01.153</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="adastop_example.html#sphx-glr-auto-examples-adastop-example-py"><span class="std std-ref">Compare PPO and A2C on Acrobot with AdaStop</span></a> (<code class="docutils literal notranslate"><span class="pre">adastop_example.py</span></code>)</p></td>
diff --git a/preview_pr/basics/userguide/export_training_data.html b/preview_pr/basics/userguide/export_training_data.html
index 528534d98..3726b5063 100644
--- a/preview_pr/basics/userguide/export_training_data.html
+++ b/preview_pr/basics/userguide/export_training_data.html
@@ -301,7 +301,7 @@ <h2>How to import data from tensorboard?<a class="headerlink" href="#how-to-impo
 <span class="n">model2_seed2</span><span class="o">.</span><span class="n">learn</span><span class="p">(</span><span class="n">total_timesteps</span><span class="o">=</span><span class="mi">5_000</span><span class="p">,</span> <span class="n">tb_log_name</span><span class="o">=</span><span class="s2">&quot;A2C&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>Then, if you need to these logs in a pandas dataframe, you can use the tool <code class="docutils literal notranslate"><span class="pre">tensorboard_folder_to_dataframe</span></code>.
+<p>Then, if you need to these logs in a pandas dataframe, you can use the tool <a class="reference internal" href="../../generated/rlberry.manager.tensorboard_folder_to_dataframe.html#rlberry.manager.tensorboard_folder_to_dataframe" title="rlberry.manager.tensorboard_folder_to_dataframe"><span class="xref myst py py-func">tensorboard_folder_to_dataframe</span></a>.
 It will give you a <code class="docutils literal notranslate"><span class="pre">Dict</span></code> with all the scalar data from the tensorboad folder.</p>
 <div class="highlight-none notranslate"><div class="highlight"><pre><span></span>- The `keys` will be the &quot;tag&quot; (the name of the measure)
 - the `values` will be the `dataframe` with 4 columns : [&quot;name&quot;, &quot;n_simu&quot;, &quot;x&quot;, &quot;y&quot;]
diff --git a/preview_pr/generated/rlberry.manager.tensorboard_folder_to_dataframe.html b/preview_pr/generated/rlberry.manager.tensorboard_folder_to_dataframe.html
index 430d0dce0..34b739c4f 100644
--- a/preview_pr/generated/rlberry.manager.tensorboard_folder_to_dataframe.html
+++ b/preview_pr/generated/rlberry.manager.tensorboard_folder_to_dataframe.html
@@ -126,8 +126,32 @@ <h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">
 <dl class="py function">
 <dt class="sig sig-object py" id="rlberry.manager.tensorboard_folder_to_dataframe">
 <span class="sig-prename descclassname"><span class="pre">rlberry.manager.</span></span><span class="sig-name descname"><span class="pre">tensorboard_folder_to_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_tensorboard_data</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/rlberry/manager/utils.html#tensorboard_folder_to_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#rlberry.manager.tensorboard_folder_to_dataframe" title="Permalink to this definition">¶</a></dt>
-<dd><p>path_to_tensorboard_data : path to the tensorboard data. It must be the parent folder of all the training, and the event have to be in this kind of path : &lt;path_to_tensorboard_data/algo_name/n_simu/events.out.tfevents.xxxxx&gt;</p>
-<p>Return a dict of panda dataframe (key = tag, value = panda.dataframe)</p>
+<dd><p>Function to convert ‘tensorboard log’ to ‘Panda DataFrames’</p>
+<dl class="simple">
+<dt>To convert the ‘tensorboard log’, the input must be must be the path to “the parent folder of all the training log” (path_to_tensorboard_data), and the ‘events.out.tfevents’ files have to be in this kind of path :</dt><dd><p>&lt; path_to_tensorboard_data/algo_name/n_simu/events.out.tfevents.xxxxx &gt;</p>
+</dd>
+</dl>
+<p>The output format is a dictionary.
+key = tag (type of data)
+value = Panda DataFrame with the following structure (4 column) :</p>
+<blockquote>
+<div><p>“name” = algo_name
+“n_simu” = n_simu (seed)
+“x” = step number
+“y” = value of the data</p>
+</div></blockquote>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><dl class="simple">
+<dt><strong>path_to_tensorboard_data</strong><span class="classifier">path to the parent folder of the tensorboard’s data.</span></dt><dd></dd>
+</dl>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><dl class="simple">
+<dt><strong>Dict</strong><span class="classifier">dict of Panda DataFrame (key = tag, value = Panda.DataFrame)</span></dt><dd></dd>
+</dl>
+</dd>
+</dl>
 </dd></dl>
 
 <div class="clearer"></div></section>
diff --git a/preview_pr/searchindex.js b/preview_pr/searchindex.js
index db1d69eb9..ce128b45b 100644
--- a/preview_pr/searchindex.js
+++ b/preview_pr/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["about", "api", "auto_examples/adastop_example", "auto_examples/comparison_agents", "auto_examples/demo_agents/demo_SAC", "auto_examples/demo_agents/index", "auto_examples/demo_agents/video_plot_a2c", "auto_examples/demo_agents/video_plot_dqn", "auto_examples/demo_agents/video_plot_mbqvi", "auto_examples/demo_agents/video_plot_mdqn", "auto_examples/demo_agents/video_plot_ppo", "auto_examples/demo_agents/video_plot_rs_kernel_ucbvi", "auto_examples/demo_agents/video_plot_rsucbvi", "auto_examples/demo_agents/video_plot_vi", "auto_examples/demo_bandits/index", "auto_examples/demo_bandits/plot_TS_bandit", "auto_examples/demo_bandits/plot_compare_index_bandits", "auto_examples/demo_bandits/plot_exp3_bandit", "auto_examples/demo_bandits/plot_mirror_bandit", "auto_examples/demo_bandits/plot_ucb_bandit", "auto_examples/demo_bandits/sg_execution_times", "auto_examples/demo_env/example_atari_atlantis_vectorized_ppo", "auto_examples/demo_env/example_atari_breakout_vectorized_ppo", "auto_examples/demo_env/index", "auto_examples/demo_env/video_plot_acrobot", "auto_examples/demo_env/video_plot_apple_gold", "auto_examples/demo_env/video_plot_atari_freeway", "auto_examples/demo_env/video_plot_chain", "auto_examples/demo_env/video_plot_gridworld", "auto_examples/demo_env/video_plot_mountain_car", "auto_examples/demo_env/video_plot_old_gym_compatibility_wrapper_old_acrobot", "auto_examples/demo_env/video_plot_pball", "auto_examples/demo_env/video_plot_rooms", "auto_examples/demo_env/video_plot_springcartpole", "auto_examples/demo_env/video_plot_twinrooms", "auto_examples/example_venv", "auto_examples/index", "auto_examples/plot_agent_manager", "auto_examples/plot_checkpointing", "auto_examples/plot_kernels", "auto_examples/plot_smooth", "auto_examples/plot_writer_wrapper", "auto_examples/sg_execution_times", "basics/DeepRLTutorial/TutorialDeepRL", "basics/comparison", "basics/create_agent", "basics/evaluate_agent", "basics/experiment_setup", "basics/multiprocess", "basics/quick_start_rl/quickstart", "basics/rlberry how to", "basics/seeding", "basics/userguide/adastop", "basics/userguide/agent", "basics/userguide/environment", "basics/userguide/experimentManager", "basics/userguide/export_training_data", "basics/userguide/external_lib", "basics/userguide/logging", "basics/userguide/save_load", "basics/userguide/seeding", "basics/userguide/visualization", "beginner_dev_guide", "changelog", "contributing", "contributors", "generated/rlberry.agents.Agent", "generated/rlberry.agents.AgentWithSimplePolicy", "generated/rlberry.agents.stable_baselines.StableBaselinesAgent", "generated/rlberry.agents.utils.replay.ReplayBuffer", "generated/rlberry.envs.PipelineEnv", "generated/rlberry.envs.atari_make", "generated/rlberry.envs.basewrapper.Wrapper", "generated/rlberry.envs.gym_make", "generated/rlberry.envs.interface.Model", "generated/rlberry.manager.AdastopComparator", "generated/rlberry.manager.ExperimentManager", "generated/rlberry.manager.MultipleManagers", "generated/rlberry.manager.compare_agents", "generated/rlberry.manager.evaluate_agents", "generated/rlberry.manager.plot_smoothed_curves", "generated/rlberry.manager.plot_synchronized_curves", "generated/rlberry.manager.plot_writer_data", "generated/rlberry.manager.preset_manager", "generated/rlberry.manager.read_writer_data", "generated/rlberry.manager.run_venv_xp", "generated/rlberry.manager.tensorboard_folder_to_dataframe", "generated/rlberry.manager.with_venv", "generated/rlberry.seeding.safe_reseed", "generated/rlberry.seeding.seeder.Seeder", "generated/rlberry.seeding.set_external_seed", "generated/rlberry.spaces.Box", "generated/rlberry.spaces.Dict", "generated/rlberry.spaces.Discrete", "generated/rlberry.spaces.MultiBinary", "generated/rlberry.spaces.MultiDiscrete", "generated/rlberry.spaces.Tuple", "generated/rlberry.utils.check_env", "generated/rlberry.utils.check_experiment_manager", "generated/rlberry.utils.check_fit_additive", "generated/rlberry.utils.check_rl_agent", "generated/rlberry.utils.check_save_load", "generated/rlberry.utils.check_seeding_agent", "generated/rlberry.utils.logging.set_level", "generated/rlberry.utils.writers.DefaultWriter", "generated/rlberry.wrappers.RescaleRewardWrapper", "generated/rlberry.wrappers.discretize_state.DiscretizeStateWrapper", "generated/rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper", "index", "installation", "user_guide", "user_guide2", "versions"], "filenames": ["about.rst", "api.rst", "auto_examples/adastop_example.rst", "auto_examples/comparison_agents.rst", "auto_examples/demo_agents/demo_SAC.rst", "auto_examples/demo_agents/index.rst", "auto_examples/demo_agents/video_plot_a2c.rst", "auto_examples/demo_agents/video_plot_dqn.rst", "auto_examples/demo_agents/video_plot_mbqvi.rst", "auto_examples/demo_agents/video_plot_mdqn.rst", "auto_examples/demo_agents/video_plot_ppo.rst", "auto_examples/demo_agents/video_plot_rs_kernel_ucbvi.rst", "auto_examples/demo_agents/video_plot_rsucbvi.rst", "auto_examples/demo_agents/video_plot_vi.rst", "auto_examples/demo_bandits/index.rst", "auto_examples/demo_bandits/plot_TS_bandit.rst", "auto_examples/demo_bandits/plot_compare_index_bandits.rst", "auto_examples/demo_bandits/plot_exp3_bandit.rst", "auto_examples/demo_bandits/plot_mirror_bandit.rst", "auto_examples/demo_bandits/plot_ucb_bandit.rst", "auto_examples/demo_bandits/sg_execution_times.rst", "auto_examples/demo_env/example_atari_atlantis_vectorized_ppo.rst", "auto_examples/demo_env/example_atari_breakout_vectorized_ppo.rst", "auto_examples/demo_env/index.rst", "auto_examples/demo_env/video_plot_acrobot.rst", "auto_examples/demo_env/video_plot_apple_gold.rst", "auto_examples/demo_env/video_plot_atari_freeway.rst", "auto_examples/demo_env/video_plot_chain.rst", "auto_examples/demo_env/video_plot_gridworld.rst", "auto_examples/demo_env/video_plot_mountain_car.rst", "auto_examples/demo_env/video_plot_old_gym_compatibility_wrapper_old_acrobot.rst", "auto_examples/demo_env/video_plot_pball.rst", "auto_examples/demo_env/video_plot_rooms.rst", "auto_examples/demo_env/video_plot_springcartpole.rst", "auto_examples/demo_env/video_plot_twinrooms.rst", "auto_examples/example_venv.rst", "auto_examples/index.rst", "auto_examples/plot_agent_manager.rst", "auto_examples/plot_checkpointing.rst", "auto_examples/plot_kernels.rst", "auto_examples/plot_smooth.rst", "auto_examples/plot_writer_wrapper.rst", "auto_examples/sg_execution_times.rst", "basics/DeepRLTutorial/TutorialDeepRL.md", "basics/comparison.md", "basics/create_agent.rst", "basics/evaluate_agent.rst", "basics/experiment_setup.rst", "basics/multiprocess.rst", "basics/quick_start_rl/quickstart.md", "basics/rlberry how to.rst", "basics/seeding.rst", "basics/userguide/adastop.md", "basics/userguide/agent.md", "basics/userguide/environment.md", "basics/userguide/experimentManager.md", "basics/userguide/export_training_data.md", "basics/userguide/external_lib.md", "basics/userguide/logging.md", "basics/userguide/save_load.md", "basics/userguide/seeding.md", "basics/userguide/visualization.md", "beginner_dev_guide.md", "changelog.rst", "contributing.md", "contributors.rst", "generated/rlberry.agents.Agent.rst", "generated/rlberry.agents.AgentWithSimplePolicy.rst", "generated/rlberry.agents.stable_baselines.StableBaselinesAgent.rst", "generated/rlberry.agents.utils.replay.ReplayBuffer.rst", "generated/rlberry.envs.PipelineEnv.rst", "generated/rlberry.envs.atari_make.rst", "generated/rlberry.envs.basewrapper.Wrapper.rst", "generated/rlberry.envs.gym_make.rst", "generated/rlberry.envs.interface.Model.rst", "generated/rlberry.manager.AdastopComparator.rst", "generated/rlberry.manager.ExperimentManager.rst", "generated/rlberry.manager.MultipleManagers.rst", "generated/rlberry.manager.compare_agents.rst", "generated/rlberry.manager.evaluate_agents.rst", "generated/rlberry.manager.plot_smoothed_curves.rst", "generated/rlberry.manager.plot_synchronized_curves.rst", "generated/rlberry.manager.plot_writer_data.rst", "generated/rlberry.manager.preset_manager.rst", "generated/rlberry.manager.read_writer_data.rst", "generated/rlberry.manager.run_venv_xp.rst", "generated/rlberry.manager.tensorboard_folder_to_dataframe.rst", "generated/rlberry.manager.with_venv.rst", "generated/rlberry.seeding.safe_reseed.rst", "generated/rlberry.seeding.seeder.Seeder.rst", "generated/rlberry.seeding.set_external_seed.rst", "generated/rlberry.spaces.Box.rst", "generated/rlberry.spaces.Dict.rst", "generated/rlberry.spaces.Discrete.rst", "generated/rlberry.spaces.MultiBinary.rst", "generated/rlberry.spaces.MultiDiscrete.rst", "generated/rlberry.spaces.Tuple.rst", "generated/rlberry.utils.check_env.rst", "generated/rlberry.utils.check_experiment_manager.rst", "generated/rlberry.utils.check_fit_additive.rst", "generated/rlberry.utils.check_rl_agent.rst", "generated/rlberry.utils.check_save_load.rst", "generated/rlberry.utils.check_seeding_agent.rst", "generated/rlberry.utils.logging.set_level.rst", "generated/rlberry.utils.writers.DefaultWriter.rst", "generated/rlberry.wrappers.RescaleRewardWrapper.rst", "generated/rlberry.wrappers.discretize_state.DiscretizeStateWrapper.rst", "generated/rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.rst", "index.md", "installation.md", "user_guide.md", "user_guide2.rst", "versions.rst"], "titles": ["About us", "rlberry API", "Compare PPO and A2C on Acrobot with AdaStop", "Compare Bandit Algorithms", "SAC Soft Actor-Critic", "Illustration of rlberry agents", "A demo of A2C algorithm in PBall2D environment", "A demo of DQN algorithm in CartPole environment", "A demo of MBQVI algorithm in Gridworld environment", "A demo of M-DQN algorithm in CartPole environment", "A demo of PPO algorithm in PBall2D environment", "A demo of RSKernelUCBVIAgent algorithm in Acrobot environment", "A demo of RSUCBVI algorithm in MountainCar environment", "A demo of ValueIteration algorithm in Chain environment", "Illustration of bandits in rlberry", "Comparison of Thompson sampling and UCB on Bernoulli and Gaussian bandits", "Comparison subplots of various index based bandits algorithms", "EXP3 Bandit cumulative regret", "A demo of Bandit BAI on a real dataset to select mirrors", "UCB Bandit cumulative regret", "Computation times", "A demo of ATARI Atlantis environment with vectorized PPOAgent", "A demo of ATARI Breakout environment with vectorized PPOAgent", "Illustration of rlberry environments", "A demo of Acrobot environment with RSUCBVIAgent", "A demo of AppleGold environment", "A demo of ATARI Freeway environment with DQNAgent", "A demo of Chain environment", "A demo of Gridworld environment with ValueIterationAgent", "A demo of MountainCar environment", "A demo of OldGymCompatibilityWrapper with old_Acrobot environment", "A demo of PBALL2D environment", "A demo of rooms environment", "A demo of SpringCartPole environment with DQNAgent", "A demo of twinrooms environment", "Using multiple virtual environments with rlberry", "Gallery of examples", "A demo of Experiment Manager", "Checkpointing", "Plot kernel functions", "Illustration of plotting tools on Bandits", "Record reward during training and then plot it", "Computation times", "Quickstart for Deep Reinforcement Learning in rlberry", "Comparison of Agents", "Create an agent", "Evaluate an agent and optimize its hyperparameters", "Setup and run experiments using yaml config files", "Parallelization in rlberry", "Quick Start for Reinforcement Learning in rlberry", "Libraries", "Seeding &amp; Reproducibility", "Adaptive hypothesis testing for comparison of RL agents with AdaStop", "How to use an Agent", "How to use an environment", "How to use the ExperimentManager", "How to export/import data (rlberry data, tensorboard data, \u2026)?", "How to use the external libraries", "How to log your experiment", "How to save/load an experiment", "How to seed your experiment", "Visualization of policies and plots of training/evaluation metrics in rlberry", "How to contribute", "Changelog", "Contributing", "&lt;no title&gt;", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents</span></code>.Agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents</span></code>.AgentWithSimplePolicy", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents.stable_baselines</span></code>.StableBaselinesAgent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents.utils.replay</span></code>.ReplayBuffer", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.PipelineEnv", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.atari_make", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs.basewrapper</span></code>.Wrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.gym_make", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs.interface</span></code>.Model", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.AdastopComparator", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.ExperimentManager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.MultipleManagers", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.compare_agents", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.evaluate_agents", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_smoothed_curves", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_synchronized_curves", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_writer_data", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.preset_manager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.read_writer_data", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.run_venv_xp", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.tensorboard_folder_to_dataframe", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.with_venv", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding</span></code>.safe_reseed", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding.seeder</span></code>.Seeder", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding</span></code>.set_external_seed", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Box", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Dict", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Discrete", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.MultiBinary", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.MultiDiscrete", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Tuple", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_env", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_experiment_manager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_fit_additive", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_rl_agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_save_load", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_seeding_agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils.logging</span></code>.set_level", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils.writers</span></code>.DefaultWriter", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers</span></code>.RescaleRewardWrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers.discretize_state</span></code>.DiscretizeStateWrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers.gym_utils</span></code>.OldGymCompatibilityWrapper", "An RL Library for Research and Education", "Installation", "User Guide", "User guide : contents", "Documentation versions"], "terms": {"thi": [0, 2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 28, 33, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 82, 84, 86, 87, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 109, 110], "project": [0, 54, 62], "wa": [0, 15, 16, 18, 37, 38, 41, 43, 54, 55, 56, 58, 62, 63, 69, 72, 74, 76, 88, 104, 105, 106, 107], "initi": [0, 35, 37, 38, 45, 46, 53, 56, 58, 59, 60, 63, 66, 67, 68, 72, 74, 76, 92, 96, 105, 106, 107], "i": [0, 6, 7, 8, 9, 10, 11, 12, 13, 16, 18, 21, 22, 26, 28, 33, 35, 37, 38, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109, 110], "activ": [0, 21, 22, 26, 62, 63, 109], "maintain": [0, 63], "inria": [0, 63], "scool": [0, 53, 54, 61, 63, 108, 110], "team": [0, 63, 112], "The": [0, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 22, 26, 35, 37, 38, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 72, 74, 76, 77, 78, 80, 82, 84, 92, 94, 96, 104, 105, 106, 107, 108, 109, 110, 111], "follow": [0, 7, 9, 41, 43, 44, 51, 55, 56, 57, 60, 61, 62, 64, 75], "peopl": 0, "contribut": [0, 63], "aleshi94": [0, 65], "brahimdriss": [0, 65], "matheu": [0, 65], "m": [0, 3, 5, 15, 16, 17, 19, 36, 40, 43, 62, 65, 73], "centa": [0, 65], "omar": [0, 65], "d": [0, 38, 65, 92], "r\u00e9my": [0, 65], "degenn": [0, 65], "yanni": [0, 65], "flet": [0, 65], "berliac": [0, 65], "hector": [0, 65], "kohler": [0, 65], "edouard": [0, 65], "leurent": [0, 65], "pierr": [0, 65], "m\u00e9nard": [0, 65], "wari": [0, 65], "radji": [0, 65], "sauxpa": [0, 65], "xuedong": [0, 65], "shang": [0, 65], "ju": [0, 65], "t": [0, 3, 15, 16, 17, 19, 38, 40, 44, 49, 50, 53, 58, 59, 60, 63, 65, 66, 67, 69, 72, 74, 75, 105, 106, 107, 108], "timotheemathieu": [0, 52, 65, 75], "riccardo": [0, 65], "della": [0, 65], "vecchia": [0, 65], "yannberthelot": [0, 65], "If": [0, 41, 48, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87, 88, 89, 91, 93, 94, 95, 104, 105, 106, 107, 108, 109, 110, 111], "you": [0, 41, 43, 45, 46, 47, 49, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 69, 72, 74, 76, 77, 105, 106, 107, 108, 109, 110, 111], "us": [0, 2, 3, 7, 9, 15, 16, 18, 21, 22, 26, 36, 37, 38, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 56, 58, 59, 61, 62, 63, 64, 69, 72, 74, 77, 80, 81, 89, 91, 92, 94, 95, 96, 98, 99, 100, 102, 104, 106, 108, 109, 110, 111], "scientif": 0, "public": 0, "we": [0, 15, 18, 25, 35, 37, 38, 41, 43, 44, 46, 49, 50, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 75, 76, 78, 80, 81, 82, 84, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 108, 109, 110, 111], "would": [0, 56, 58], "appreci": 0, "citat": 0, "bibtex": 0, "entri": [0, 59, 69, 104], "misc": [0, 63], "author": 0, "domingu": 0, "darwich": 0, "e": [0, 18, 38, 43, 44, 48, 49, 50, 52, 54, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 93, 105, 106, 107, 109], "nard": 0, "valko": 0, "michal": 0, "doi": [0, 78], "10": [0, 8, 9, 15, 16, 18, 21, 27, 28, 32, 34, 37, 38, 39, 41, 42, 43, 46, 49, 50, 52, 53, 54, 55, 56, 60, 61, 62, 63, 67, 68, 76, 78, 99], "5281": 0, "zenodo": 0, "5544540": 0, "month": 0, "titl": [0, 15, 16, 17, 19, 40, 41, 43, 49, 50, 62, 82], "A": [0, 3, 5, 14, 15, 16, 20, 23, 36, 42, 43, 47, 50, 51, 53, 54, 66, 67, 68, 69, 71, 72, 73, 74, 76, 79, 84, 92, 93, 94, 105, 106, 107, 109], "reinforc": [0, 46, 53, 54, 61, 63, 67, 68, 72, 74, 105, 106, 107, 108, 109, 110, 111], "learn": [0, 17, 35, 45, 53, 54, 55, 56, 60, 61, 62, 63, 67, 68, 72, 74, 80, 81, 105, 106, 107, 108, 109, 110, 111], "librari": [0, 35, 43, 48, 51, 52, 59, 61, 63, 66, 67, 75, 76, 80, 87, 90, 91, 92, 93, 94, 95, 96, 109, 110], "research": [0, 54, 60, 63, 109], "educ": 0, "url": [0, 18], "http": [0, 18, 37, 46, 51, 52, 53, 54, 62, 63, 64, 66, 67, 68, 69, 71, 72, 74, 75, 76, 78, 89, 90, 91, 105, 106, 107, 108, 109], "github": [0, 52, 54, 62, 63, 66, 67, 69, 72, 75, 78, 90, 108, 109], "com": [0, 18, 52, 62, 63, 66, 67, 69, 72, 75, 90, 91, 108, 109], "py": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 47, 54, 59, 62, 63, 64, 69, 72, 91, 108, 109], "year": 0, "2021": [0, 17, 53, 54], "like": [0, 44, 46, 48, 53, 54, 56, 64, 78, 92, 109, 110], "thank": 0, "particip": 0, "phd": 0, "": [0, 7, 9, 12, 18, 21, 22, 26, 35, 37, 43, 44, 45, 48, 53, 54, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 71, 72, 74, 76, 78, 82, 84, 92, 96, 98, 99, 100, 102, 103, 105, 106, 107, 108, 110], "made": [0, 62], "happen": [0, 28, 61], "particular": [0, 52, 62, 76, 91, 92, 93, 94, 95, 96], "work": [0, 16, 40, 48, 51, 56, 60, 61, 62, 63, 76, 77, 109], "environ": [0, 4, 5, 17, 18, 19, 37, 40, 42, 45, 46, 47, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 71, 72, 73, 74, 76, 85, 87, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109, 110, 111], "universit\u00e9": 0, "de": 0, "lill": 0, "site": [0, 54], "uln": 0, "anr": 0, "anrt": 0, "renault": 0, "european": 0, "chist": 0, "era": 0, "delta": 0, "la": 0, "r\u00e9gion": 0, "haut": 0, "franc": 0, "mel": 0, "go": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 49, 53, 54, 55, 72, 74, 105, 106, 107], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 44, 49, 50, 59, 69, 72, 74, 105, 106, 107], "download": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], "full": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 62, 64, 69], "exampl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 69, 70, 72, 74, 80, 83, 89, 90, 100, 106, 108, 110, 111], "code": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 48, 49, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 68, 69, 72, 74, 76, 77, 105, 106, 107, 108], "illustr": [2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 42, 56, 61, 66, 67, 76, 82, 110], "adastop_compar": 2, "which": [2, 25, 37, 43, 44, 49, 50, 54, 56, 57, 61, 62, 63, 64, 66, 67, 68, 72, 74, 76, 78, 80, 81, 82, 88, 89, 90, 91, 93, 94, 95, 98, 99, 100, 102, 105, 106, 107], "adapt": [2, 44, 61, 63, 69, 91, 109, 110], "multipl": [2, 3, 36, 42, 52, 63, 76, 77, 78, 80, 81, 82, 85, 87, 92, 96, 110], "test": [2, 3, 18, 21, 22, 26, 30, 37, 43, 50, 53, 56, 59, 60, 62, 63, 75, 78, 98, 99, 100, 102, 108, 109, 110], "assess": [2, 3, 44, 49, 50, 61], "whether": [2, 3, 44, 68, 72, 74, 75, 76, 77, 80, 81, 82, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "train": [2, 3, 4, 7, 9, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 33, 35, 36, 37, 38, 40, 42, 43, 44, 45, 46, 47, 49, 50, 52, 53, 55, 56, 57, 58, 59, 60, 63, 66, 67, 68, 75, 76, 77, 78, 82, 84, 86, 104, 108, 110], "agent": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 47, 48, 51, 54, 56, 57, 58, 60, 62, 63, 64, 72, 74, 75, 76, 77, 78, 79, 82, 83, 84, 89, 98, 99, 100, 101, 102, 104, 105, 106, 107], "ar": [2, 3, 16, 18, 35, 38, 40, 43, 44, 46, 48, 49, 51, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 66, 67, 68, 72, 74, 75, 76, 80, 81, 82, 87, 92, 93, 95, 104, 105, 106, 107, 108, 110, 111], "statist": [2, 3, 37, 41, 44, 52, 53, 55, 56, 63, 75, 76, 77, 78], "differ": [2, 3, 38, 43, 44, 49, 51, 52, 54, 57, 58, 60, 61, 62, 66, 67, 68, 72, 74, 75, 76, 80, 81, 82, 84, 92, 96, 105, 106, 107, 108], "remark": [2, 3, 35, 44, 48, 49, 82], "case": [2, 3, 15, 28, 43, 44, 59, 61, 72, 74, 80, 81, 82, 105, 106, 107], "where": [2, 3, 21, 22, 26, 38, 43, 44, 48, 49, 55, 59, 60, 62, 66, 67, 68, 69, 76, 88, 93, 95, 104], "two": [2, 3, 15, 37, 45, 49, 50, 51, 53, 57, 60, 61, 63, 64, 75, 99, 108], "deem": [2, 3], "can": [2, 3, 7, 9, 37, 38, 41, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110], "mean": [2, 3, 15, 16, 18, 19, 35, 40, 44, 49, 50, 52, 56, 57, 61, 67, 68, 76, 78, 80, 81, 82, 108], "either": [2, 3, 44, 52, 61, 62], "thei": [2, 3, 60, 61, 62, 64, 80, 82, 88], "effici": [2, 3, 48, 49, 52, 64, 108, 111], "have": [2, 3, 43, 44, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 63, 64, 66, 67, 69, 72, 74, 80, 81, 82, 86, 105, 106, 107, 108], "been": [2, 3, 41, 44, 56, 72, 74, 105, 106, 107, 108], "enough": [2, 3, 44, 54, 108], "fit": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 53, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 68, 75, 76, 77, 78, 79, 82, 83, 84, 99, 104], "variabl": [2, 3, 43, 49, 50, 54, 61, 72, 74, 94, 105, 106, 107], "result": [2, 7, 9, 18, 21, 22, 26, 30, 43, 44, 46, 48, 53, 56, 57, 58, 59, 61, 63, 72, 74, 75, 76, 78, 79, 82, 105, 106, 107, 108, 110], "info": [2, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 41, 43, 45, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61, 64, 67, 69, 72, 74, 82, 84, 103, 105, 106, 107], "13": [2, 50, 53, 54, 55, 56], "35": [2, 43, 50, 53, 54, 55, 56], "finish": [2, 37, 43, 55, 58, 59, 60, 72, 74, 75, 105, 106, 107], "agent1": [2, 40, 44], "v": [2, 21, 22, 37, 43, 44, 50, 52, 53, 54, 75], "agent2": [2, 40, 44], "diff": [2, 44, 62], "std": [2, 15, 19, 40, 44, 61, 80, 81, 82], "1": [2, 4, 8, 11, 12, 15, 17, 18, 19, 21, 22, 24, 26, 27, 30, 31, 34, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 66, 67, 68, 69, 70, 72, 75, 76, 78, 79, 80, 82, 84, 89, 93, 94, 95, 104, 109], "2": [2, 8, 11, 16, 17, 18, 19, 21, 22, 24, 28, 30, 31, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 60, 66, 67, 68, 72, 75, 76, 78, 80], "decis": [2, 44, 52, 75], "0": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 67, 68, 69, 70, 72, 74, 75, 76, 78, 79, 80, 81, 82, 87, 93, 94, 95, 105, 106, 107], "274": 2, "85": [2, 50, 55, 56], "068": [2, 53], "189": 2, "206": [2, 63], "185": 2, "82553": 2, "71784": 2, "smaller": [2, 52, 69, 75], "from": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 82, 83, 84, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 100, 105, 106, 107, 110], "rlberri": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 26, 29, 30, 34, 37, 38, 40, 41, 42, 45, 46, 47, 50, 51, 52, 55, 59, 62, 63, 64, 109, 111], "env": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 66, 67, 68, 69, 76, 79, 82, 83, 84, 89, 97, 98, 99, 100, 101, 102, 105, 106, 107], "import": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 63, 64, 68, 69, 70, 71, 73, 76, 79, 80, 82, 83, 84, 87, 89, 90, 100, 110, 111], "gym_mak": [2, 7, 9, 21, 22, 26, 43, 44, 46, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 68, 69, 70, 71, 76, 79, 82, 84, 89], "stable_baselines3": [2, 35, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 68, 71, 79], "stable_baselin": [2, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 63, 79], "stablebaselinesag": [2, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 63, 79], "manag": [2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 35, 36, 38, 40, 41, 42, 43, 44, 46, 47, 48, 50, 52, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 71, 110, 111], "adastopcompar": [2, 52], "env_ctor": [2, 3, 4, 15, 16, 17, 18, 19, 37, 40, 41, 44, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 68, 70, 71, 73, 76, 83, 98, 99, 100, 102], "env_kwarg": [2, 3, 4, 15, 16, 17, 18, 19, 37, 40, 41, 44, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 68, 70, 71, 73, 76, 83, 98, 99, 100, 102], "dict": [2, 4, 21, 22, 26, 33, 35, 37, 41, 43, 44, 46, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 69, 71, 72, 74, 75, 76, 79, 82, 83, 84, 86, 98, 99, 100, 102, 104, 105, 106, 107], "id": [2, 18, 21, 22, 26, 43, 44, 46, 52, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 68, 70, 71, 73, 79, 82, 84], "v1": [2, 7, 9, 35, 44, 46, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 68, 69, 70, 72, 73, 74, 79, 82, 84, 105, 106, 107, 110, 111], "agent_class": [2, 47, 52, 63, 76, 82, 84], "train_env": [2, 3, 47, 52, 55, 76, 83], "fit_budget": [2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 76, 79, 82, 83, 84], "5e4": [2, 52, 56, 61], "agent_nam": [2, 4, 18, 21, 22, 26, 43, 44, 49, 52, 55, 56, 57, 58, 59, 60, 61, 75, 76, 79], "init_kwarg": [2, 17, 19, 21, 22, 26, 33, 37, 40, 43, 44, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 76, 79, 98, 99, 100, 101, 102], "algo_cl": [2, 43, 44, 52, 55, 56, 57, 58, 60, 61, 68, 79], "polici": [2, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 43, 45, 50, 52, 53, 56, 57, 59, 63, 66, 67, 68, 72, 74, 79, 105, 106, 107, 108, 110, 111], "mlppolici": [2, 35, 52, 53, 56, 57, 61, 68, 79], "verbos": [2, 35, 52, 53, 55, 56, 57, 58, 61, 64, 68, 75, 76, 79, 85, 87], "print": [2, 3, 7, 8, 9, 11, 12, 13, 18, 21, 22, 25, 26, 28, 32, 35, 38, 43, 44, 46, 51, 52, 53, 55, 56, 58, 59, 60, 64, 69, 75, 76, 104], "managers_path": [2, 52, 75], "total": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 72, 74, 76, 80, 105, 106, 107], "run": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 46, 49, 50, 55, 56, 57, 58, 59, 60, 62, 64, 66, 67, 68, 72, 74, 75, 76, 77, 82, 87, 105, 106, 107, 108, 109, 110, 111], "time": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 48, 49, 50, 52, 53, 54, 55, 56, 58, 60, 61, 62, 66, 67, 69, 72, 74, 75, 76, 77, 79, 80, 81, 82, 99, 104, 105, 106, 107, 108, 109], "script": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 47, 48, 62, 63, 64, 85, 87], "minut": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 111], "000": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 42], "second": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 46, 51, 55, 57, 60, 76, 104, 108], "python": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 47, 48, 62, 64, 72, 76, 77, 87, 109], "sourc": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107], "adastop_exampl": [2, 42], "jupyt": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], "notebook": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 63, 64, 76, 77], "ipynb": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41], "galleri": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 62, 64, 109, 111], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 47, 51, 60, 62, 63, 64, 66, 67, 68, 69, 72, 74, 76, 78, 88, 89, 90, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 110], "sphinx": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 62, 63, 109], "compare_ag": [3, 44], "function": [3, 21, 22, 35, 36, 37, 41, 42, 43, 44, 50, 51, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 72, 74, 76, 78, 80, 82, 84, 85, 87, 105, 106, 107, 109, 111], "numpi": [3, 15, 16, 17, 18, 19, 31, 35, 37, 39, 40, 41, 44, 45, 48, 49, 50, 51, 53, 60, 61, 63, 66, 67, 69, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "np": [3, 15, 16, 17, 18, 19, 31, 35, 37, 39, 40, 41, 44, 45, 49, 50, 53, 61, 69, 72, 74, 82, 84, 89, 91, 93, 94, 95, 105, 106, 107], "comparison": [3, 14, 20, 36, 60, 63, 66, 67, 75, 76, 82, 110, 111], "agentmanag": [3, 44, 63], "rlberry_research": [3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 29, 30, 31, 32, 33, 34, 37, 39, 40, 47, 48, 50, 63, 82], "bernoullibandit": [3, 15, 16], "indexag": [3, 15, 16, 19, 40, 63], "makeboundedmossindex": [3, 16], "makeboundednptsindex": [3, 16], "makeboundeducbindex": [3, 15, 16], "makeetcindex": [3, 16], "paramet": [3, 4, 15, 16, 17, 18, 19, 28, 35, 37, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 59, 61, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 87, 88, 89, 91, 93, 94, 95, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107], "problem": [3, 15, 16, 17, 18, 19, 40, 44, 60], "arrai": [3, 15, 16, 17, 18, 19, 31, 40, 50, 69, 72, 74, 75, 80, 82, 105, 106, 107], "6": [3, 8, 16, 17, 18, 31, 43, 49, 50, 53, 54, 55, 56, 60, 75, 83], "9": [3, 15, 16, 18, 19, 25, 32, 35, 37, 40, 41, 43, 46, 47, 49, 50, 53, 54, 55, 56, 58, 60, 63, 80, 81, 82], "arm": [3, 15, 16, 18, 19, 40], "len": [3, 15, 16, 18, 19, 39, 40], "2000": [3, 15, 16, 53, 54, 56], "horizon": [3, 8, 11, 12, 15, 16, 17, 18, 19, 24, 30, 32, 34, 40, 41, 46, 47, 49, 50, 67, 68, 76], "n": [3, 32, 37, 38, 49, 52, 53, 54, 75, 89, 91, 93, 94, 95, 109], "50": [3, 7, 8, 13, 28, 41, 43, 44, 47, 49, 50, 53, 54, 55, 56, 59, 61, 75, 78, 79], "number": [3, 15, 16, 17, 18, 19, 21, 22, 26, 38, 40, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 55, 56, 58, 59, 60, 61, 63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "construct": [3, 15, 16, 17, 19, 35, 40, 43, 48, 49, 54, 62, 63, 69, 110], "experi": [3, 15, 16, 17, 18, 19, 35, 36, 40, 42, 43, 50, 52, 53, 61, 62, 63, 66, 67, 69, 75, 76, 79, 82, 84, 85, 108], "p": [3, 15, 16, 31, 37, 43, 44, 50, 53, 54, 78], "class": [3, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 45, 46, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 63, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 84, 88, 89, 91, 92, 93, 94, 95, 96, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109], "ucbag": [3, 16, 19, 40], "name": [3, 12, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 45, 49, 50, 53, 55, 56, 57, 58, 59, 60, 61, 62, 64, 66, 67, 68, 72, 74, 75, 76, 78, 79, 80, 81, 82, 84, 85, 87, 104, 105, 106, 107], "ucb": [3, 14, 16, 20, 36, 40, 66, 67, 76, 82], "def": [3, 4, 15, 16, 17, 18, 19, 35, 37, 38, 40, 41, 45, 46, 49, 50, 53, 57, 67, 87], "__init__": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 53, 57, 66, 67, 68, 72, 74, 105, 106, 107], "self": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 49, 50, 53, 57, 59, 63, 66, 67, 68, 69, 75, 76, 89, 92, 95, 104], "kwarg": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 53, 55, 57, 58, 59, 60, 63, 66, 67, 68, 70, 71, 72, 73, 75, 76, 105, 106, 107], "index": [3, 14, 15, 19, 20, 36, 40, 62, 66, 67, 75, 76, 82, 96], "_": [3, 4, 8, 15, 16, 17, 18, 19, 32, 37, 38, 40, 43, 44, 53, 60, 66, 67, 69, 76], "writer_extra": [3, 15, 16, 17, 18, 19, 40, 41, 49, 58, 66, 67, 68], "reward": [3, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 42, 43, 45, 53, 54, 56, 58, 59, 63, 66, 67, 68, 69, 72, 74, 76, 78, 82, 84, 105, 106, 107, 108, 110, 111], "etcag": [3, 16], "etc": [3, 16, 45, 51, 53, 57, 63, 64, 108], "20": [3, 15, 16, 17, 19, 20, 29, 34, 37, 40, 43, 49, 50, 53, 54, 55, 56, 60, 76, 80, 82, 99], "action_and_reward": [3, 16, 18, 66, 67, 68], "mossag": [3, 16, 40], "moss": [3, 16, 40], "nptsagent": [3, 16], "npt": [3, 16], "tracker_param": [3, 16, 17], "agents_class": [3, 16, 17], "parallel": [3, 16, 17, 19, 40, 46, 55, 57, 63, 76, 77, 82, 83, 84, 108, 111], "process": [3, 16, 17, 19, 38, 40, 43, 57, 59, 60, 61, 62, 63, 66, 67, 68, 76, 77, 82, 83, 84], "mp_context": [3, 16, 17, 19, 40, 48, 76, 77, 82, 83, 84], "fork": [3, 16, 17, 19, 40, 63, 76, 77], "n_fit": [3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 44, 46, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 76, 79, 82, 83, 84], "eval_funct": [3, 78], "eval_budget": [3, 78], "none": [3, 8, 15, 18, 35, 37, 38, 43, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87, 89, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 104, 105, 106, 107], "agent_id": [3, 44, 76, 78], "df": [3, 56, 59, 80], "get_writer_data": [3, 63, 76], "return": [3, 4, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 52, 53, 57, 63, 64, 66, 67, 68, 69, 71, 72, 74, 75, 76, 78, 79, 82, 84, 86, 88, 89, 91, 92, 93, 94, 95, 96, 97, 100, 104, 105, 106, 107], "max": [3, 15, 16, 17, 19, 37, 40, 53, 54, 55, 77], "sum": [3, 43, 67, 68, 75], "loc": [3, 56], "tag": [3, 15, 16, 17, 19, 38, 40, 41, 43, 46, 49, 56, 59, 61, 63, 69, 82, 84, 86, 104, 109], "valu": [3, 18, 21, 22, 35, 37, 43, 44, 45, 49, 50, 53, 55, 56, 58, 60, 61, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 84, 86, 92, 94, 95, 96, 104, 105, 106, 107], "method": [3, 38, 44, 45, 46, 48, 49, 51, 53, 59, 60, 61, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 109], "tukey_hsd": [3, 44, 78], "b": [3, 53, 54, 62, 69, 75, 78, 91], "10_000": [3, 78], "comparison_ag": [3, 42], "show": [4, 15, 16, 17, 18, 19, 38, 39, 40, 41, 43, 44, 45, 53, 56, 57, 59, 79, 80, 81, 82, 108, 111], "how": [4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 35, 38, 40, 41, 43, 44, 45, 49, 52, 64, 69, 72, 74, 80, 81, 82, 105, 106, 107, 108, 110], "pendulum": [4, 54], "gymnasium": [4, 6, 7, 9, 21, 22, 26, 33, 35, 53, 55, 63, 66, 67, 68, 71, 72, 73, 74, 91, 92, 93, 94, 95, 96, 97, 105, 106, 107, 108, 110, 111], "gym": [4, 7, 9, 21, 22, 26, 35, 43, 54, 57, 63, 72, 73, 74, 91, 92, 93, 94, 95, 96, 97, 105, 106, 107], "torch": [4, 6, 7, 9, 10, 21, 22, 26, 33, 46, 48, 51, 55, 58, 60, 62, 63, 64, 66, 76, 82, 83, 84, 90, 109, 110, 111], "sacag": 4, "experimentmanag": [4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 46, 47, 48, 49, 50, 51, 56, 57, 58, 59, 61, 63, 66, 67, 68, 75, 77, 78, 79, 82, 83, 84, 98, 100, 104, 110], "wrap_spac": [4, 63, 72, 73], "true": [4, 11, 16, 17, 18, 21, 22, 32, 33, 35, 37, 38, 40, 43, 44, 46, 47, 49, 50, 53, 55, 57, 59, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 84, 88, 89, 104, 105, 106, 107], "setup": [4, 63], "env_nam": 4, "int": [4, 15, 16, 17, 18, 19, 38, 40, 55, 58, 59, 60, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 82, 84, 89, 90, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "2e5": 4, "f": [4, 7, 9, 35, 38], "wrapper": [4, 6, 7, 9, 11, 21, 22, 24, 26, 29, 30, 33, 34, 53, 54, 55, 57, 58, 61, 63, 70, 71, 74, 108, 109], "timelimit": [4, 6, 33, 72, 74, 105, 106, 107], "max_episode_step": [4, 6, 33, 69], "200": [4, 6, 10, 12, 29, 46, 56, 83], "recordepisodestatist": 4, "creat": [4, 35, 37, 38, 46, 49, 50, 51, 59, 60, 61, 62, 63, 66, 67, 68, 78, 80, 81, 82, 91, 108, 109, 110, 111], "instanc": [4, 45, 46, 47, 49, 51, 53, 55, 57, 58, 59, 60, 61, 62, 63, 66, 67, 68, 72, 74, 75, 76, 77, 79, 82, 84, 104, 105, 106, 107, 108], "xp_manag": [4, 18, 19, 41], "enable_tensorboard": [4, 55, 63, 76], "start": [4, 16, 35, 38, 40, 53, 54, 60, 62, 63, 72, 74, 93, 95, 96, 105, 106, 107, 108, 110], "demo_sac": 4, "demo": [5, 14, 20, 23, 36, 42, 66, 67, 71, 73, 76, 79, 84, 93, 105, 107], "ppo": [5, 21, 22, 36, 42, 43, 44, 53, 55, 56, 58, 60, 61, 63, 68, 73, 75, 79, 83, 110], "algorithm": [5, 14, 17, 19, 20, 25, 35, 36, 37, 40, 42, 43, 44, 49, 50, 52, 53, 56, 57, 58, 60, 61, 63, 64, 66, 67, 68, 72, 73, 74, 76, 78, 80, 81, 82, 105, 106, 107, 108, 109], "pball2d": [5, 23, 36, 48], "valueiter": [5, 25, 36, 50, 53], "chain": [5, 23, 35, 36, 49, 53, 54, 61], "rsucbvi": [5, 24, 36, 47], "mountaincar": [5, 23, 36, 43, 51, 54, 60, 61, 89], "a2c": [5, 35, 36, 42, 44, 56, 57, 61, 63, 68, 73, 75, 79, 110, 111], "sac": [5, 36, 63, 75, 76], "soft": [5, 36, 76], "actor": [5, 36, 43, 76], "critic": [5, 21, 22, 36, 43, 58, 72, 74, 76, 103, 105, 106, 107], "mbqvi": [5, 29, 34, 36], "gridworld": [5, 23, 36, 37, 41, 50, 61, 63, 72, 74, 105, 106, 107], "rskernelucbviag": [5, 36, 105], "acrobot": [5, 23, 30, 36, 42, 52, 54, 61, 68, 70, 73, 75, 79, 83, 105, 110, 111], "dqn": [5, 26, 33, 36, 44, 61, 63, 73, 83], "cartpol": [5, 35, 36, 43, 44, 46, 52, 53, 54, 55, 56, 57, 58, 60, 61, 63, 68, 69, 72, 73, 74, 82, 84, 105, 106, 107], "set": [6, 7, 8, 9, 10, 11, 12, 13, 44, 46, 54, 60, 62, 63, 64, 66, 67, 68, 69, 72, 74, 76, 82, 84, 89, 90, 91, 92, 93, 94, 95, 96, 103, 105, 106, 107, 109, 111], "up": [6, 7, 8, 9, 10, 11, 12, 13, 62, 72, 74, 82, 105, 106, 107, 111], "an": [6, 8, 10, 12, 13, 16, 17, 19, 21, 22, 35, 37, 38, 40, 41, 43, 44, 47, 48, 50, 52, 55, 56, 57, 58, 60, 61, 63, 64, 67, 68, 69, 70, 72, 74, 76, 80, 81, 82, 83, 92, 93, 94, 95, 96, 100, 104, 105, 106, 107, 109], "chosen": [6, 7, 8, 9, 10, 11, 12, 13, 50, 61, 64, 68, 72, 74, 76, 93, 105, 106, 107], "here": [6, 7, 8, 9, 10, 11, 12, 13, 35, 43, 44, 49, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 64, 108, 109, 110], "a2cag": [6, 44, 48, 57, 76, 82, 84], "benchmark": [6, 10, 25, 31, 32, 34, 47, 48, 63, 64, 98, 99, 100, 102], "ball_explor": [6, 10, 31, 48], "256": [6, 22, 33, 48, 76], "n_timestep": 6, "50_000": 6, "gamma": [6, 8, 11, 12, 13, 21, 22, 24, 25, 28, 29, 30, 32, 34, 35, 37, 43, 46, 47, 48, 49, 50, 53, 59, 67, 68, 76], "99": [6, 8, 11, 12, 21, 22, 24, 29, 30, 37, 45, 46, 48, 50, 52, 55, 76], "learning_r": [6, 21, 22, 26, 35, 43, 46, 48, 53, 55, 56, 57], "001": [6, 43, 48], "budget": [6, 7, 9, 10, 11, 12, 18, 24, 30, 33, 37, 38, 41, 45, 49, 50, 53, 55, 57, 58, 59, 60, 66, 67, 68, 76, 99], "enable_rend": [6, 8, 10, 11, 12, 13, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 49, 53, 54, 61, 72, 105, 106, 107], "observ": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 37, 43, 45, 49, 50, 53, 54, 59, 61, 67, 68, 69, 72, 74, 105, 106, 107], "reset": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 45, 49, 50, 53, 54, 59, 61, 67, 68, 69, 72, 74, 104, 105, 106, 107], "tt": [6, 8, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 32, 33, 49, 50, 53, 54, 59, 61], "rang": [6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 44, 45, 49, 50, 53, 54, 59, 60, 61, 67, 69, 80, 82, 87, 105], "action": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 40, 41, 43, 45, 49, 50, 53, 54, 58, 59, 61, 63, 66, 67, 68, 69, 72, 74, 93, 95, 105, 106, 107], "termin": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 43, 45, 49, 50, 53, 54, 59, 61, 63, 67, 69, 72, 74, 105, 106, 107], "truncat": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 45, 49, 50, 53, 54, 59, 61, 67, 69, 72, 74, 105, 106, 107], "step": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 41, 43, 44, 45, 49, 50, 53, 54, 56, 59, 61, 62, 66, 67, 69, 72, 74, 75, 78, 104, 105, 106, 107], "done": [6, 7, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 45, 47, 48, 49, 50, 53, 54, 55, 59, 61, 62, 63, 64, 69, 72, 74, 75, 76, 88, 105, 106, 107], "video": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 53, 54, 72, 105, 106, 107, 109, 110], "save_video": [6, 8, 10, 11, 12, 13, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 53, 54, 62, 72, 105, 106, 107], "_video": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 62], "video_plot_a2c": 6, "mp4": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 53, 54, 55, 62], "As": [7, 9, 44, 48, 49, 50, 55, 58, 59, 72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "computation": [7, 9], "intens": [7, 9], "hard": [7, 9], "tune": [7, 9, 21, 22, 26, 33, 43, 44, 55], "one": [7, 9, 41, 44, 46, 49, 53, 54, 55, 56, 59, 61, 62, 63, 64, 66, 67, 69, 72, 74, 76, 78, 80, 81, 82, 84, 94, 99, 105, 106, 107, 108], "tensorboard": [7, 9, 55, 63, 66, 67, 68, 76, 86, 104, 108, 109, 110, 111], "visual": [7, 9, 43, 49, 56, 57, 75, 109, 110], "command": [7, 9, 62], "logdir": [7, 9, 55], "path": [7, 9, 54, 55, 56, 59, 62, 63, 66, 67, 68, 75, 76, 78, 82, 84, 86], "writer": [7, 9, 18, 38, 41, 46, 57, 59, 61, 63, 66, 67, 68, 76, 82, 84, 110], "log_dir": [7, 9, 63], "parent": [7, 9, 53, 54, 86, 104], "util": [7, 9, 21, 22, 26, 33, 35, 41, 46, 54, 58, 59, 61, 63, 64], "summarywrit": [7, 9, 63, 66, 67, 68, 76, 104], "dqnagent": [7, 23, 36, 44, 71, 76, 82, 83, 84], "log": [7, 9, 41, 46, 48, 55, 56, 57, 63, 66, 67, 68, 72, 74, 76, 104, 105, 106, 107, 110], "configure_log": [7, 9], "record_video": [7, 9, 21, 22, 26, 53, 54, 55], "recordvideo": [7, 9, 21, 22, 26, 53, 54, 55, 61], "shutil": [7, 9, 21, 22, 26], "o": [7, 9, 21, 22, 26, 76], "level": [7, 9, 44, 48, 52, 53, 54, 61, 63, 64, 68, 75, 76, 78, 80, 81, 82, 85, 103, 110], "render_mod": [7, 9, 21, 22, 26, 53, 54, 55, 61, 71, 72, 74, 105, 106, 107], "rgb_arrai": [7, 9, 21, 22, 26, 53, 54, 55, 61, 72, 74, 105, 106, 107], "epsilon_decay_interv": [7, 9], "1000": [7, 9, 33, 35, 38, 55, 56, 57, 58, 60, 76, 82], "set_writ": [7, 9, 66, 67, 68, 76], "temp": [7, 9, 15, 18, 21, 22, 26, 37, 38, 41, 43, 55, 56, 58, 59], "episod": [7, 9, 21, 22, 26, 43, 45, 46, 47, 49, 53, 54, 55, 59, 61, 66, 67, 68, 69, 72, 74, 76, 82, 84, 104, 105, 106, 107], "3": [7, 8, 9, 15, 18, 19, 28, 31, 35, 37, 41, 43, 49, 50, 53, 54, 55, 56, 58, 60, 61, 72, 74, 76, 80, 87, 104, 105, 106, 107], "fals": [7, 9, 11, 16, 18, 21, 22, 25, 26, 32, 33, 40, 41, 43, 44, 45, 47, 50, 53, 54, 55, 57, 58, 59, 60, 61, 68, 69, 71, 72, 73, 76, 79, 80, 81, 82, 84, 85, 87, 88, 89, 102, 104], "while": [7, 9, 37, 38, 45, 53, 64, 69], "close": [7, 9, 21, 22, 26, 53, 54, 72, 74, 105, 106, 107], "need": [7, 9, 21, 22, 26, 38, 44, 46, 47, 48, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61, 62, 64, 72, 74, 76, 80, 82, 91, 92, 93, 94, 95, 96, 105, 106, 107, 109, 110], "move": [7, 9, 21, 22, 26, 49, 50, 63, 72, 74, 105, 106, 107], "final": [7, 9, 18, 21, 22, 26, 35, 37, 43, 50, 110, 111], "insid": [7, 9, 21, 22, 26, 53, 55, 58, 59, 60, 63, 64, 84, 91, 96], "folder": [7, 9, 21, 22, 26, 41, 54, 56, 59, 62, 64, 77, 82, 84, 86], "document": [7, 9, 21, 22, 26, 41, 45, 60, 63, 110, 111], "renam": [7, 9, 21, 22, 26, 63], "rl": [7, 9, 21, 22, 26, 35, 44, 57, 61, 63, 64, 110, 111], "video_plot_dqn": 7, "rmtree": [7, 9, 21, 22, 26], "rlberry_scool": [8, 13, 25, 27, 28, 29, 32, 34, 41, 49, 53, 54, 59, 60, 63, 100], "mbqviagent": [8, 29, 34], "finit": [8, 13, 27, 28, 37, 50, 53, 54], "param": [8, 16, 46, 47, 66, 67, 68, 72, 74, 76, 105, 106, 107], "n_sampl": [8, 29, 34], "100": [8, 18, 39, 41, 46, 47, 49, 50, 53, 54, 55, 56, 58, 60, 61, 67, 69, 76, 83, 87], "sampl": [8, 14, 17, 18, 20, 27, 32, 36, 37, 46, 49, 50, 51, 52, 53, 54, 60, 63, 66, 67, 68, 69, 72, 74, 76, 78, 82, 91, 92, 93, 94, 95, 96, 105, 106, 107], "per": [8, 43, 50, 94], "state": [8, 18, 21, 22, 28, 37, 43, 45, 50, 53, 63, 72, 74, 97, 105, 106, 107], "pair": [8, 72, 74, 104, 105, 106, 107], "7": [8, 17, 18, 21, 28, 41, 49, 50, 53, 54, 55, 56, 60], "wall": [8, 28, 37, 41, 50, 61], "success_prob": [8, 37, 41, 50], "evalu": [8, 21, 22, 26, 35, 37, 43, 44, 45, 47, 49, 50, 53, 55, 57, 58, 60, 66, 67, 68, 70, 75, 76, 78, 79, 80, 82, 108, 110], "determinist": [8, 35, 51, 67, 68], "version": [8, 21, 22, 26, 49, 53, 54, 59, 62, 72, 74, 76, 87, 105, 106, 107, 110], "env_ev": 8, "next_": 8, "video_plot_mbqvi": 8, "munchausendqnag": 9, "munchausen": [9, 63], "5": [9, 15, 17, 19, 25, 27, 31, 35, 37, 39, 40, 41, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 67, 68, 69, 71, 75, 76, 79, 80, 89], "video_plot_mdqn": 9, "ppoagent": [10, 23, 36, 44, 71, 76, 83], "n_step": [10, 21, 22, 35, 43, 48, 55], "3e3": 10, "video_plot_ppo": 10, "rskernelucbvi": 11, "rescalerewardwrapp": [11, 24, 30, 70], "rescak": 11, "300": [11, 24, 30, 55], "bonus_scale_factor": [11, 12, 24, 30, 47], "01": [11, 20, 24, 26, 30, 37, 38, 41, 42, 52, 53, 56, 75, 87], "min_dist": [11, 24, 30, 47], "bandwidth": [11, 80, 82], "05": [11, 44, 55, 56, 78], "beta": [11, 15, 44, 63, 69, 75], "kernel_typ": [11, 39], "gaussian": [11, 14, 20, 36, 39, 61, 63, 66, 67, 76, 78, 80, 81, 82], "500": [11, 12, 21, 22, 26, 33, 38, 41, 43, 44, 46, 52, 55, 56, 59, 61, 69, 76, 79, 82, 83, 84], "time_before_don": 11, "achiev": [11, 72, 74, 105, 106, 107, 108], "goal": [11, 43, 49, 50, 72, 74, 105, 106, 107, 108], "first": [11, 37, 38, 44, 49, 51, 52, 55, 56, 59, 60, 61, 62, 66, 67, 72, 74, 75, 76, 96, 104, 105, 106, 107, 108, 109], "video_plot_rs_kernel_ucbvi": 11, "rsucbviag": [12, 23, 30, 36, 47, 51, 89, 105], "classic_control": [12, 29, 33], "170": [12, 56], "r": [12, 18, 21, 22, 37, 43, 49, 50, 67, 68, 91], "ucbvi": [12, 49, 63], "video_plot_rsucbvi": 12, "dynprog": [13, 25, 28, 32, 53], "valueiterationag": [13, 23, 25, 32, 36, 37, 50, 53], "95": [13, 21, 22, 28, 46, 50, 53, 55, 56, 59, 63, 80, 81, 82], "break": [13, 21, 22, 25, 26, 28, 32, 37, 53, 59], "video_plot_vi": 13, "cumul": [14, 15, 16, 20, 36, 40, 41, 43, 49, 66, 67, 76, 82, 84], "regret": [14, 15, 16, 20, 36, 40, 49, 66, 67, 76, 82], "exp3": [14, 16, 20, 36, 66, 67, 76, 82], "thompson": [14, 17, 20, 36, 63, 66, 67, 76, 82], "bernoulli": [14, 17, 20, 36, 66, 67, 76, 82], "subplot": [14, 20, 36, 39, 40, 41, 56, 66, 67, 75, 76, 82], "variou": [14, 20, 36, 61, 66, 67, 76, 82], "base": [14, 19, 20, 36, 37, 40, 45, 53, 63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 82, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "bai": [14, 20, 36, 66, 67, 76, 84, 93], "real": [14, 20, 36, 66, 67, 76, 84, 93, 109], "dataset": [14, 20, 35, 36, 66, 67, 76, 84, 93], "select": [14, 16, 20, 36, 43, 49, 53, 58, 66, 67, 76, 84, 93], "mirror": [14, 20, 36, 66, 67, 76, 84, 93], "In": [15, 18, 37, 43, 44, 49, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 72, 74, 80, 81, 82, 91, 105, 106, 107, 108, 110, 111], "prior": [15, 17, 63], "compar": [15, 16, 36, 37, 42, 43, 44, 50, 57, 60, 61, 68, 73, 75, 78, 79, 110, 111], "bound": [15, 72, 74, 91, 105, 106, 107], "support": [15, 62, 72, 74, 76, 96, 105, 106, 107, 109], "For": [15, 43, 44, 45, 46, 49, 53, 54, 55, 56, 57, 58, 59, 60, 62, 64, 66, 67, 69, 72, 74, 82, 84, 87, 94, 105, 106, 107, 109, 111, 112], "sub": [15, 16, 50, 62, 69, 92, 96], "02": [15, 38], "save": [15, 18, 21, 22, 24, 26, 28, 30, 33, 37, 38, 41, 43, 47, 53, 55, 56, 57, 58, 62, 63, 66, 67, 68, 72, 76, 77, 80, 81, 82, 84, 105, 106, 107, 108, 110], "pickl": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59, 63, 66, 67, 68, 77, 82, 84], "rlberry_data": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59], "manager_data": [15, 18, 37, 38, 41, 43, 55, 56, 58, 76], "agent_2024": [15, 38], "16_10": [15, 18, 37, 38, 41], "15_bd80da50": 15, "manager_obj": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59, 63], "max_work": [15, 37, 38, 43, 49, 55, 56, 58, 59, 60, 63, 76, 77, 83], "15_847030c5": 15, "26_8ade7c97": 15, "26_ae7d0a45": 15, "normalbandit": [15, 19, 40], "tsagent": [15, 17], "makesubgaussianucbindex": [15, 19, 40], "makebetaprior": [15, 17], "makegaussianprior": 15, "plot_writer_data": [15, 16, 17, 19, 38, 40, 41, 43, 46, 49, 56, 58, 61, 63], "definit": [15, 16, 17, 18, 19, 40, 44], "bernoullitsag": [15, 17], "boundeducbag": 15, "8": [15, 18, 22, 35, 43, 44, 46, 49, 50, 53, 54, 55, 56, 59, 60], "mc": [15, 16, 17, 19, 40, 66], "simu": [15, 16, 17, 19, 40], "comput": [15, 16, 17, 19, 37, 40, 44, 49, 52, 59, 61, 62, 63, 64, 66, 67, 72, 74, 75, 76, 80, 81, 82, 105, 106, 107, 108, 109], "plot": [15, 16, 17, 19, 36, 38, 42, 43, 46, 49, 50, 52, 55, 56, 58, 60, 63, 66, 67, 76, 79, 80, 81, 82, 108, 110], "pseudo": [15, 16, 17, 19, 40], "compute_pseudo_regret": [15, 16, 17, 19, 40], "cumsum": [15, 16, 17, 19, 40, 41, 50, 82, 84], "astyp": [15, 16, 19, 40], "output": [15, 16, 17, 19, 21, 22, 26, 37, 40, 41, 49, 50, 53, 54, 57, 58, 59, 60, 63, 66, 67, 68, 76, 77, 108, 110], "preprocess_func": [15, 16, 17, 19, 40, 41, 82, 84], "gaussiantsag": 15, "sigma": [15, 19, 40], "gaussianucbag": 15, "ones": [15, 19, 40], "469": [15, 20], "plot_ts_bandit": [15, 20], "sever": [16, 35, 38, 44, 46, 48, 49, 50, 52, 57, 61, 78, 80, 81, 82, 108], "product": [16, 60, 91], "also": [16, 45, 48, 51, 56, 60, 61, 62, 64, 66, 67, 72, 73, 74, 82, 89, 105, 106, 107, 108, 110, 111], "home": [16, 40], "runner": [16, 40], "main": [16, 40, 47, 48, 49, 52, 62, 63, 64, 72, 76, 104, 111], "616": [16, 40], "userwarn": [16, 40, 54], "No": [16, 40, 44, 50, 54], "artist": [16, 40], "label": [16, 40, 64], "found": [16, 40, 52, 76, 108], "put": [16, 21, 22, 26, 37, 40, 43, 45, 53], "legend": [16, 40], "note": [16, 40, 44, 46, 49, 52, 57, 66, 67, 68, 69, 72, 74, 76, 82, 84, 105, 106, 107], "whose": [16, 40, 61, 67, 69, 82], "underscor": [16, 40, 104], "ignor": [16, 40], "when": [16, 18, 38, 40, 43, 44, 48, 51, 52, 57, 59, 60, 61, 62, 63, 64, 66, 67, 72, 74, 75, 76, 78, 97, 104, 105, 106, 107], "call": [16, 35, 38, 40, 44, 46, 48, 49, 52, 55, 58, 59, 60, 61, 63, 64, 66, 67, 69, 72, 74, 76, 77, 79, 80, 81, 82, 88, 105, 106, 107], "argument": [16, 40, 55, 58, 60, 61, 63, 66, 67, 68, 71, 73, 76, 89, 96, 98, 99, 100, 102, 109], "plt": [16, 18, 19, 39, 40, 41, 56, 61, 75, 79, 80, 81, 82], "matplotlib": [16, 18, 19, 39, 40, 41, 56, 61, 75, 79, 80, 81, 82, 109], "pyplot": [16, 18, 19, 39, 40, 41, 56, 61, 79], "randomizedag": [16, 17], "makeboundedimedindex": 16, "makeboundeducbvindex": 16, "makeexp3index": [16, 17], "ucbvag": 16, "ucbv": 16, "imedag": 16, "im": 16, "exp3ag": [16, 17], "prob": [16, 17, 35], "seed": [16, 18, 30, 34, 35, 38, 43, 44, 47, 48, 55, 56, 57, 58, 59, 61, 63, 66, 67, 68, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 91, 92, 93, 94, 95, 96, 97, 100, 105, 106, 107, 110, 111], "42": [16, 30, 50, 53, 55, 56, 58, 60, 61, 75, 79, 82, 83, 84, 96], "should": [16, 17, 19, 21, 22, 26, 40, 51, 59, 60, 61, 62, 63, 64, 66, 67, 72, 74, 80, 81, 82, 84, 105, 106, 107], "give": [16, 17, 18, 19, 35, 40, 50, 55, 56, 58, 59, 60, 61, 62, 108, 109], "even": [16, 17, 19, 38, 40, 72, 74, 89, 105, 106, 107], "compute_regret": 16, "linestyl": [16, 80, 81, 82], "each": [16, 43, 44, 47, 48, 49, 50, 51, 52, 60, 61, 62, 63, 64, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 91, 92, 95, 96, 104, 105, 106, 107, 108], "compute_na": 16, "fig": [16, 19, 39, 40, 41, 75], "ax": [16, 19, 39, 40, 41, 56, 75, 80, 81, 82], "sharei": 16, "figsiz": [16, 19, 39, 40, 75], "ravel": [16, 40], "lambda": [16, 63, 78], "na": [16, 59], "str": [16, 21, 22, 26, 56, 66, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 84, 85, 87, 91, 92, 98, 99, 100, 102, 103, 104, 105, 106, 107], "tight_layout": [16, 40], "41": [16, 20, 50, 53, 56], "147": [16, 20, 44, 55], "plot_compare_index_bandit": [16, 20], "defin": [17, 19, 37, 40, 43, 44, 50, 54, 63, 70, 72, 73, 74, 76, 77, 89, 104, 105, 106, 107, 109, 110, 111], "random": [17, 35, 37, 43, 44, 49, 50, 51, 53, 60, 61, 66, 67, 68, 69, 72, 74, 75, 76, 78, 79, 88, 89, 90, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "adversarialbandit": 17, "3000": [17, 19, 40, 53, 56], "switching_reward": 17, "gap": 17, "rate": [17, 52, 62], "adversari": 17, "switch": [17, 63], "over": [17, 21, 22, 26, 49, 61, 67, 68, 76, 78, 108, 109], "exponenti": [17, 91], "long": [17, 59, 62, 63, 64], "phase": 17, "inspir": 17, "zimmert": 17, "julian": 17, "yevgeni": 17, "seldin": 17, "tsalli": 17, "inf": [17, 18], "optim": [17, 18, 21, 22, 26, 33, 37, 43, 49, 50, 51, 53, 55, 57, 66, 67, 68, 76, 108, 109], "stochast": [17, 21, 22, 26, 43, 49, 52, 60, 67, 108], "j": [17, 35, 43, 44], "mach": 17, "re": [17, 59, 60, 62, 76, 77], "22": [17, 40, 42, 43, 50, 53, 54, 55, 56, 63], "28": [17, 50, 53, 54, 55, 56, 59, 60], "zero": [17, 37, 38, 45, 50, 53], "exp": [17, 39], "high_reward": 17, "floor": [17, 18], "els": [17, 18, 43, 53, 54, 59, 92], "selected_reward": 17, "enumer": [17, 39, 40, 61, 79], "axi": [17, 37, 50, 61, 63, 80, 81, 82], "648": [17, 20], "plot_exp3_bandit": [17, 20], "exempl": 18, "sequenti": [18, 52, 75, 108], "halv": [18, 76], "find": [18, 43, 46, 53, 54, 55, 59, 63, 108, 110], "best": [18, 76], "server": [18, 63], "ubuntu": [18, 53, 54], "among": [18, 49, 66, 67, 68, 76], "choic": [18, 78], "french": 18, "quirck": 18, "applic": 18, "possibl": [18, 43, 44, 48, 49, 52, 61, 63, 64, 72, 74, 75, 80, 82, 93, 95, 104, 105, 106, 107, 108], "timeout": [18, 46, 57, 76], "ping": 18, "handl": [18, 44, 48, 51, 63, 104], "median": 18, "instead": [18, 48, 49, 53, 56, 61, 63, 64, 66, 76, 104], "object": [18, 46, 51, 60, 66, 67, 68, 69, 72, 74, 76, 77, 79, 88, 89, 90, 92, 104, 105, 106, 107, 108], "three": [18, 43], "part": [18, 59, 60, 62, 108], "03": [18, 42], "worker": [18, 43, 53, 55, 56, 59, 76, 77], "max_global_step": [18, 43, 53, 55, 56, 59], "sh": 18, "821": 18, "4": [18, 19, 21, 22, 26, 37, 39, 41, 43, 46, 48, 49, 50, 53, 54, 55, 56, 57, 58, 60, 76, 78, 80, 83], "465": [18, 40], "459": 18, "17": [18, 43, 50, 55, 56], "464": 18, "25": [18, 24, 30, 31, 37, 50, 52, 53, 54, 55, 56, 59, 60, 72, 74, 105, 106, 107], "314": 18, "33": [18, 50, 53, 54, 55, 56], "301": 18, "43": [18, 50, 55, 56], "313": 18, "53": [18, 50, 53, 56], "306": 18, "63": [18, 50, 55, 56], "308": [18, 63], "73": [18, 50, 53, 55], "304": 18, "83": [18, 43, 44, 50, 55, 56, 63], "307": [18, 56], "93": [18, 50, 53, 54, 55, 56], "sh_2024": 18, "17_feb7766d": 18, "fastest": 18, "read_writer_data": [18, 49, 56, 63], "interfac": [18, 45, 48, 53, 58, 63, 66, 67], "model": [18, 35, 54, 56, 57, 60, 61, 72, 78, 80, 81, 82, 105, 106, 107], "banditwithsimplepolici": [18, 63], "space": [18, 37, 40, 43, 63, 72, 73, 74, 105, 106, 107], "request": [18, 64], "logger": [18, 35, 54, 57, 64, 68, 76, 103, 110], "mirrors_ubuntu": 18, "lafibr": 18, "ikoula": 18, "ovh": 18, "net": 18, "miroir": 18, "univ": 18, "lorrain": 18, "fr": 18, "nant": 18, "ftp": 18, "u": [18, 49, 50, 51, 52, 61, 62, 108], "picardi": 18, "reim": 18, "www": [18, 53, 54], "lip6": 18, "pub": 18, "linux": [18, 48, 53, 54, 62], "distribut": [18, 21, 22, 26, 44, 52, 75, 78, 91], "archiv": [18, 63], "get_tim": 18, "try": [18, 43, 54, 88], "resp": 18, "get": [18, 43, 44, 50, 52, 54, 58, 59, 61, 66, 67, 68, 72, 74, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "elaps": [18, 56, 82], "total_second": 18, "except": [18, 48, 72, 74, 100, 105, 106, 107], "mirrorbandit": 18, "respons": 18, "meant": 18, "On": [18, 61], "neg": [18, 72, 74, 91, 105, 106, 107], "wait": 18, "reach": [18, 72, 74, 105, 106, 107], "mirror_ubuntu": 18, "warn": [18, 28, 43, 53, 54, 55, 60, 63, 64, 76, 77, 82, 89, 92, 96, 103, 104, 109], "queri": 18, "infinit": 18, "url_id": 18, "list": [18, 35, 47, 55, 56, 58, 61, 62, 70, 72, 74, 75, 76, 78, 79, 82, 84, 87, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "subset": 18, "provid": [18, 37, 43, 51, 54, 56, 57, 63, 66, 67, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110, 111], "all": [18, 35, 36, 44, 46, 49, 50, 51, 54, 55, 56, 57, 59, 60, 61, 62, 64, 66, 67, 68, 69, 75, 76, 80, 81, 82, 86, 92, 93, 95, 96, 108, 110], "mirrorenv": 18, "url_list": 18, "n_arm": 18, "action_spac": [18, 27, 32, 37, 49, 50, 53, 54, 67, 69, 73, 74, 88], "discret": [18, 43, 95, 106], "associ": [18, 62, 69, 104], "exist": [18, 44, 53, 54, 72, 74, 88, 105, 106, 107], "assert": [18, 44, 60, 108], "default": [18, 28, 41, 43, 48, 50, 58, 59, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 84, 85, 87, 88, 89, 91, 92, 93, 94, 95, 98, 99, 100, 102, 104, 105, 106, 107], "seqhalvag": 18, "active_set": 18, "arang": 18, "logk": 18, "ceil": 18, "log2": 18, "ep": [18, 45, 49, 53, 67], "tr": 18, "k": [18, 52, 75, 92], "reward_est": 18, "estim": [18, 43, 49, 50, 67, 68, 75, 80, 82], "half_len": 18, "argsort": 18, "optimal_act": 18, "add_scalar": [18, 38, 104], "onli": [18, 35, 38, 41, 43, 46, 48, 49, 53, 54, 55, 56, 58, 60, 61, 62, 63, 64, 66, 67, 68, 75, 76, 77, 80, 81, 82, 84, 89, 91, 92, 93, 94, 95, 96, 104, 107, 110], "iter": [18, 37, 43, 49, 53, 55, 56, 58, 61, 66, 67, 75, 104], "faster": [18, 43, 48, 49, 62, 66, 67, 104], "doc": [18, 28, 35, 48, 51, 55, 62, 63, 64, 69, 76, 89, 109], "preprocess_tag": [18, 84], "boxplot": [18, 52, 61], "xlabel": [18, 50], "ylabel": [18, 50], "agent_handl": [18, 59, 76], "37": [18, 20, 43, 50, 53, 54, 55, 56], "576": [18, 20], "plot_mirror_bandit": [18, 20], "subgaussian": [19, 40], "figur": [19, 52, 56, 79, 80, 81, 82], "gca": 19, "924": [19, 20], "plot_ucb_bandit": [19, 20], "51": [20, 50, 53, 54, 55, 56, 63], "763": 20, "execut": [20, 38, 42, 44, 48, 62, 66, 67, 68, 72, 74, 104, 105, 106, 107], "auto_examples_demo_bandit": 20, "file": [20, 42, 56, 57, 59, 62, 63, 64, 66, 67, 68, 72, 76, 77, 82, 84, 85, 87, 105, 106, 107, 111], "bandit": [20, 42, 61, 63, 66, 67, 76, 78, 82, 84, 93], "00": [20, 42, 53, 54, 56], "mb": [20, 42, 53, 54], "07": [20, 54, 56], "04": [20, 43, 53, 54, 55, 58, 63], "render": [21, 22, 24, 25, 26, 27, 28, 31, 32, 33, 34, 49, 53, 54, 61, 62, 63, 72, 74, 105, 106, 107], "slightli": [21, 22, 26, 33], "just": [21, 22, 26, 33, 38, 44, 57, 59, 62], "purpos": [21, 22, 26, 33, 43, 46, 64, 110], "datetim": [21, 22, 26], "atari_mak": [21, 22, 26, 54], "model_factory_from_env": [21, 22, 26, 33], "initial_tim": [21, 22, 26], "now": [21, 22, 26, 46, 48, 55, 61, 62, 63, 72, 74, 105, 106, 107], "init": [21, 22, 26, 43, 44, 55, 58, 60], "policy_mlp_config": [21, 22], "type": [21, 22, 26, 33, 44, 51, 56, 58, 61, 62, 64, 67, 68, 69, 76, 83, 91, 92, 94, 95, 96], "multilayerperceptron": [21, 22, 26, 33], "network": [21, 22, 26, 61, 63], "architectur": [21, 22, 26], "layer_s": [21, 22, 26, 33], "512": [21, 22, 26], "dimens": [21, 22, 26, 43], "reshap": [21, 22, 26, 33], "is_polici": [21, 22, 26], "critic_mlp_config": [21, 22], "out_siz": [21, 22], "approxim": [21, 22, 75, 78], "policy_config": [21, 22], "convolutionalnetwork": [21, 22, 26], "relu": [21, 22, 26], "in_channel": [21, 22, 26], "in_height": [21, 22, 26], "84": [21, 22, 26, 43, 50, 55, 56], "in_width": [21, 22, 26], "head_mlp_kwarg": [21, 22, 26], "transpose_ob": [21, 22, 26], "critic_config": [21, 22], "tuned_xp": [21, 22, 26, 43], "al": [21, 22, 26, 54, 71, 109], "v5": [21, 22, 26, 54, 71], "solv": [21, 22, 26, 35, 38, 43, 53, 62, 72, 74, 105, 106, 107], "hyperparamet": [21, 22, 26, 37, 43, 51, 53, 55, 57, 66, 67, 68, 76, 108, 109], "batch_siz": [21, 22, 26, 43, 46, 48, 55, 69], "64": [21, 43, 50, 55, 56, 58], "optimizer_typ": [21, 22], "adam": [21, 22], "what": [21, 22, 44, 49, 50, 52, 56, 58, 60, 61, 62, 64, 69, 80, 81], "gradient": [21, 22, 26, 43], "descent": [21, 22, 26, 43], "1e": [21, 26, 37, 43, 45, 46, 50, 53, 57], "size": [21, 22, 26, 52, 53, 54, 75, 82], "policy_net_fn": [21, 22], "constructor": [21, 22, 46, 47, 51, 55, 58, 59, 60, 66, 67, 68, 69, 73, 76, 98, 99, 100, 102], "policy_net_kwarg": [21, 22], "architecur": [21, 22], "value_net_fn": [21, 22], "value_net_kwarg": [21, 22], "n_env": [21, 22], "gae_lambda": [21, 22, 43], "clip_ep": [21, 22], "k_epoch": [21, 22], "1024": 21, "10_000_000": [21, 22], "interact": [21, 22, 26, 43, 45, 53, 54, 56, 67], "between": [21, 22, 26, 43, 44, 49, 53, 60, 104, 108], "dure": [21, 22, 26, 36, 42, 43, 55, 57, 58, 59, 61, 66, 67, 72, 74, 76, 82, 105, 106, 107, 109, 110, 111], "eval_kwarg": [21, 22, 26, 37, 43, 44, 46, 47, 49, 50, 55, 57, 58, 60, 61, 76, 79, 82, 83, 84], "eval_horizon": [21, 22, 26, 37, 43, 44, 46, 47, 49, 50, 55, 57, 58, 60, 61, 67, 68, 76, 79, 82, 83, 84], "usual": [21, 22, 26, 43, 72, 74, 105, 106, 107, 108], "good": [21, 22, 26, 43, 46, 53, 60, 61, 62], "do": [21, 22, 26, 43, 44, 49, 50, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 75, 76, 79, 80, 81, 82, 84, 88, 90, 108, 110], "more": [21, 22, 26, 28, 38, 41, 43, 47, 49, 53, 54, 55, 56, 58, 59, 61, 62, 63, 64, 69, 75, 76, 109, 110, 111], "than": [21, 22, 26, 43, 44, 48, 56, 59, 61, 64, 69, 80, 82, 91, 94, 95], "becaus": [21, 22, 26, 28, 38, 41, 43, 50, 58, 61], "ppo_tun": [21, 22], "output_dir": [21, 22, 26, 38, 55, 57, 59, 63, 66, 67, 68, 76, 82, 84], "ppo_for_atlanti": 21, "final_train_tim": [21, 22, 26], "metadata": [21, 22, 26, 53, 54, 72, 74, 104, 105, 106, 107], "mode": [21, 22, 26, 56, 72, 74, 105, 106, 107], "bug": [21, 22, 26, 63, 64], "some": [21, 22, 26, 40, 43, 44, 47, 48, 52, 54, 56, 59, 60, 61, 62, 63, 64, 72, 74, 83, 91, 105, 106, 107, 110], "30000": [21, 22, 26], "get_agent_inst": [21, 22, 26, 59, 63, 76], "final_test_tim": [21, 22, 26], "example_plot_atari_atlantis_vectorized_ppo": 21, "begin": [21, 22, 26, 44, 80, 108], "example_atari_atlantis_vectorized_ppo": 21, "5e": 22, "128": [22, 54, 56], "ppo_for_breakout": 22, "example_plot_atari_breakout_vectorized_ppo": 22, "example_atari_breakout_vectorized_ppo": 22, "applegold": [23, 36], "twinroom": [23, 36], "oldgymcompatibilitywrapp": [23, 36, 105], "old_acrobot": [23, 36, 105, 107], "room": [23, 36, 47], "springcartpol": [23, 36, 63], "atari": [23, 36, 63, 71, 76, 109, 110], "freewai": [23, 36, 71, 76], "atlanti": [23, 36, 71, 76], "vector": [23, 36, 63, 71, 76], "breakout": [23, 36, 54, 71, 76], "rescal": [24, 105], "n_episod": [24, 30, 45, 53], "video_plot_acrobot": 24, "grid_explor": [25, 32, 47], "apple_gold": 25, "reward_fre": [25, 47], "array_observ": [25, 47], "video_plot_apple_gold": 25, "mlp_config": 26, "cnn_config": 26, "q_net_constructor": [26, 33], "q_net_kwarg": [26, 33], "max_replay_s": [26, 69], "50000": 26, "32": [26, 43, 46, 50, 53, 55, 56, 60, 69], "learning_start": 26, "25000": 26, "gradient_step": 26, "epsilon_fin": 26, "chunk_siz": [26, 69], "90000": 26, "dqn_tune": 26, "dqn_for_freewai": 26, "video_plot_atari_freewai": 26, "video_plot_chain": 27, "ofvalueiter": 28, "never": [28, 62, 72, 74, 91, 94, 95, 105, 106, 107], "present": [28, 43, 62, 96, 111], "see": [28, 38, 41, 43, 44, 46, 47, 48, 49, 51, 55, 58, 59, 60, 61, 62, 63, 66, 67, 69, 72, 74, 75, 76, 80, 81, 89, 105, 106, 107, 108, 111], "inform": [28, 41, 43, 45, 49, 53, 54, 56, 58, 61, 62, 64, 66, 67, 68, 72, 74, 76, 105, 106, 107, 108, 109, 110, 111], "video_plot_gridworld": 28, "framer": [28, 62, 72, 105, 106, 107], "discretizestatewrapp": [29, 34], "_env": [29, 106], "40": [29, 50, 53, 54, 55, 56], "video_plot_montain_car": 29, "video_plot_mountain_car": 29, "old": [30, 59, 63, 104, 107], "old_env": 30, "gym_util": 30, "video_plot_old_gym_acrobot": 30, "video_plot_old_gym_compatibility_wrapper_old_acrobot": 30, "reward_amplitud": 31, "reward_smooth": 31, "reward_cent": 31, "75": [31, 37, 50, 55, 56], "co": 31, "pi": [31, 35, 40, 43, 61, 80, 81, 82], "sin": 31, "action_list": 31, "ii": [31, 34, 39, 44], "video_plot_pbal": 31, "nroom": [32, 47], "remove_wal": 32, "room_siz": 32, "initial_state_distribut": 32, "center": [32, 50], "include_trap": 32, "observation_spac": [32, 37, 53, 72, 73, 74, 88, 105, 106, 107], "999": 32, "video_plot_room": 32, "time_limit": 33, "model_config": 33, "obs_tran": 33, "swing_up": 33, "1e5": [33, 43, 44, 48, 61, 79], "video_plot_springcartpol": 33, "discretize_st": 34, "seeder": [34, 51, 59, 60, 61, 66, 67, 68, 72, 74, 76, 88, 90, 91, 93, 94, 95, 105, 106, 107, 108], "123": [34, 38, 47, 51, 57, 59, 60, 61, 89], "n_bin": [34, 106], "rese": [34, 51, 57, 60, 61, 66, 67, 68, 72, 73, 74, 88, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "video_plot_twinroom": 34, "with_venv": 35, "decor": [35, 87], "order": [35, 49, 50, 52, 57, 61, 70, 92, 108], "automat": [35, 41, 48, 49, 51, 57, 59, 62, 63, 64, 72, 74, 82, 105, 106, 107, 109], "experiment": [35, 48, 63, 108], "separ": [35, 61, 76], "compil": [35, 62], "via": [35, 48, 56, 61, 63], "run_venv_xp": [35, 87], "run_sb": 35, "run_mushroom": 35, "directli": [35, 50, 57, 61, 63, 80, 82], "text": [35, 44, 52, 62, 72, 74, 105, 106, 107], "import_lib": [35, 87], "want": [35, 41, 48, 49, 50, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 72, 74, 75, 76, 84, 97, 105, 106, 107, 108, 110, 111], "contain": [35, 56, 59, 61, 66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 87, 91, 92, 93, 94, 95, 96, 105, 106, 107, 109], "mushroom_rl": 35, "taken": [35, 52, 66, 67], "venv_dir_nam": [35, 85, 87], "rlberry_venv": [35, 85, 87], "simpl": [35, 37, 38, 43, 44, 45, 49, 50, 53, 61, 63, 67, 108, 109], "q": [35, 37, 43, 49, 53, 54, 59, 63], "qlearn": [35, 53], "core": [35, 53, 54, 72, 108, 110], "generate_simple_chain": 35, "epsgreedi": 35, "compute_j": 35, "__name__": [35, 38, 48, 76, 79, 82, 84], "results_dir": 35, "strong_lin": 35, "mdp": [35, 43, 63, 72, 74, 105, 106, 107], "state_n": 35, "goal_stat": 35, "rew": 35, "epsilon": [35, 37, 50, 59], "15": [35, 39, 50, 53, 55, 56, 63, 78], "algorithm_param": 35, "10000": [35, 53, 55, 75, 78], "n_steps_per_fit": 35, "stabl": [35, 43, 51, 69, 71, 89, 108, 109, 110, 111, 112], "baselines3": [35, 71, 109, 110], "python_v": [35, 87], "make": [35, 44, 46, 52, 53, 54, 55, 57, 62, 63, 64, 66, 67, 68, 72, 73, 74, 76, 78, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 110], "total_timestep": [35, 38, 43, 53, 55, 56, 58], "1_500": 35, "vec_env": 35, "get_env": 35, "ob": [35, 69], "cum_reward": 35, "_state": 35, "predict": [35, 60, 80, 81, 82], "__main__": [35, 38, 48, 76, 79, 82, 84], "collect": [35, 44, 52, 57, 61, 72, 74, 92, 105, 106, 107], "directori": [35, 55, 56, 63, 64, 66, 67, 68, 76, 82, 84, 85, 87], "tun": 35, "them": [35, 43, 52, 55, 56, 57, 58, 76, 80, 82, 110], "example_venv": [35, 42], "kernel": [36, 42, 82], "adastop": [36, 42, 44, 63, 68, 73, 75, 110], "record": [36, 42, 49, 56, 61, 62, 63, 66, 67, 68, 76, 82, 109], "virtual": [36, 42, 85, 87, 109], "tool": [36, 42, 56, 59, 60, 61, 63, 66, 67, 76, 82, 108, 109, 110], "checkpoint": [36, 42, 59, 66, 76, 82, 108, 111], "auto_examples_python": 36, "zip": [36, 63], "auto_examples_jupyt": 36, "grid": [37, 50, 76], "world": [37, 50, 54], "access": [37, 50, 104], "transit": [37, 43, 50, 69], "next_stat": [37, 50], "Then": [37, 41, 44, 49, 50, 54, 56, 61, 62], "implement": [37, 45, 46, 48, 52, 53, 54, 57, 63, 64, 66, 67, 72, 74, 78, 105, 106, 107, 108, 109], "leftarrow": 37, "sum_": 37, "prime": [37, 43], "left": [37, 44, 49], "right": [37, 44, 49, 53, 54, 72, 74, 105, 106, 107], "baselin": [37, 43, 50, 69, 108, 110, 111], "gather": [37, 49, 52, 53, 55, 63, 76, 108], "about": [37, 53, 55, 56, 58, 64, 66, 67, 68, 72, 74, 76, 104, 105, 106, 107, 108, 110], "valueiterationagent_2024": 37, "58_8d70b7e4": 37, "randomag": [37, 49, 50, 67], "randomagent_2024": 37, "59_00aad013": 37, "nrow": [37, 41, 50], "ncol": [37, 41, 50], "reward_at": [37, 41, 50], "agentwithsimplepolici": [37, 45, 49, 50, 53, 61, 68, 76], "ensur": [37, 45, 51, 53, 57, 60, 72, 73, 74, 105, 106, 107], "compat": [37, 45, 53, 56, 57, 63, 91, 92, 93, 94, 95, 96, 97, 98, 100, 108], "discount": [37, 43, 67, 68, 76], "factor": [37, 43, 67, 68, 76], "episilon": 37, "precis": [37, 44, 53], "tq": 37, "ss": 37, "aa": 37, "dot": [37, 44], "ab": [37, 52], "argmax": [37, 53], "classmethod": [37, 46, 57, 66, 67, 68, 76], "sample_paramet": [37, 46, 57, 66, 67, 68], "cl": [37, 46, 57], "trial": [37, 46, 53, 57, 66, 67, 68, 76], "hyperparam": [37, 46, 57, 66, 67, 68, 76], "optuna": [37, 46, 51, 66, 67, 68, 76, 109], "org": [37, 46, 51, 52, 53, 54, 62, 64, 66, 67, 68, 69, 76, 78, 89], "suggest_categor": [37, 46, 57], "pass": [37, 45, 50, 53, 57, 62, 64, 68, 72, 73, 74, 76, 105, 106, 107], "evaluate_ag": [37, 43, 44, 49, 50, 55, 57, 58, 60, 61], "vi_param": [37, 50], "job": [37, 50], "vi_stat": [37, 50], "baseline_stat": [37, 49, 50], "mont": [37, 45, 49, 50, 53, 66, 67, 68, 76], "carlo": [37, 45, 49, 50, 53, 66, 67, 68, 76], "simul": [37, 38, 43, 49, 50, 54, 55, 58, 60, 67, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107], "n_simul": [37, 43, 44, 49, 50, 55, 58, 60, 61, 67, 68, 76, 78, 79], "233": [37, 42], "plot_agent_manag": [37, 42], "minim": [38, 64, 72, 74, 105, 106, 107, 109], "your": [38, 45, 46, 49, 51, 56, 59, 62, 64, 72, 74, 76, 105, 106, 107, 108, 109, 110], "restor": 38, "previou": [38, 43, 49, 55, 56, 58, 64, 76, 108, 110], "my": 38, "00_9aa9c456": 38, "output_1": 38, "timestep": [38, 50, 66, 67, 68, 72, 74, 105, 106, 107], "output_0": 38, "load": [38, 41, 50, 56, 57, 63, 66, 67, 68, 75, 76, 78, 82, 84, 104, 108, 110], "myagent": [38, 45], "1500": [38, 56], "data": [38, 41, 43, 44, 46, 49, 52, 53, 54, 55, 58, 59, 60, 63, 66, 67, 68, 69, 72, 76, 78, 79, 80, 81, 82, 84, 86, 91, 92, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110], "checkpoint_fil": 38, "equat": [38, 44], "del": [38, 50], "check": [38, 43, 49, 51, 55, 60, 62, 63, 64, 69, 75, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 108, 110, 111], "loaded_checkpoint": 38, "get_param": [38, 66, 67, 68, 72, 74, 105, 106, 107], "__dict__": 38, "updat": [38, 43, 53, 58, 63, 69, 72, 74, 76, 105, 106, 107], "loop": [38, 53, 54], "yt": 38, "rng": [38, 51, 59, 60, 66, 67, 68, 69, 72, 74, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "normal": [38, 43, 51, 60, 91], "append": [38, 47, 57, 60, 66, 67, 69, 77], "y": [38, 44, 53, 54, 56, 59, 72, 74, 80, 81, 82, 105, 106, 107], "everi": [38, 49, 54, 60, 62, 64, 84, 108], "eval": [38, 45, 49, 50, 53, 55, 58, 60, 63, 66, 67, 68, 76, 78, 79], "befor": [38, 59, 61, 62, 66, 67, 72, 74, 75, 82, 84, 105, 106, 107], "so": [38, 44, 48, 50, 59, 60, 63, 76, 104, 108], "why": [38, 110], "interrupt": 38, "continu": [38, 63, 72, 74, 75, 76, 105, 106, 107, 108], "last": [38, 41, 62, 84, 104], "But": [38, 56, 58, 59, 60, 108], "small": [38, 52], "instanti": [38, 66, 67, 76], "itself": 38, "after": [38, 46, 49, 55, 58, 59, 61, 62, 63, 72, 74, 76, 77, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "manager_fil": 38, "delet": [38, 76], "situat": 38, "couldn": 38, "g": [38, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 105, 106, 107, 109], "loaded_manag": 38, "589": [38, 42], "plot_checkpoint": [38, 42], "requir": [39, 45, 53, 55, 57, 58, 60, 63, 64, 66, 67, 68, 69, 76, 85, 87, 98, 99, 100, 102, 109, 110, 111], "kernel_bas": [39, 47], "kernel_func": 39, "uniform": [39, 69, 91], "triangular": 39, "epanechnikov": 39, "quartic": 39, "triweight": 39, "tricub": 39, "cosin": 39, "z": [39, 49, 75], "linspac": 39, "k_type": 39, "kernel_v": 39, "set_titl": 39, "659": [39, 42], "plot_kernel": [39, 42], "makesubgaussianmossindex": 40, "pad": 40, "suptitl": 40, "error": [40, 44, 48, 52, 53, 64, 72, 74, 78, 100, 103, 105, 106, 107], "raw_curv": [40, 61, 80, 81, 82], "ci": [40, 61, 80, 81, 82], "smooth": [40, 49, 61, 63, 80, 81, 82, 108], "error_represent": [40, 61, 80, 81, 82], "cb": [40, 61, 80, 82], "894": [40, 42], "plot_smooth": [40, 42], "modifi": [41, 43, 54, 62], "easili": [41, 53, 63, 108], "alreadi": [41, 44, 62, 72, 74, 105, 106, 107], "ran": 41, "onc": [41, 46, 49, 59, 61, 62], "ha": [41, 43, 48, 49, 50, 51, 52, 53, 56, 59, 60, 62, 63, 72, 74, 76, 91, 94, 95, 105, 106, 107, 108], "comment": [41, 62], "out": [41, 56, 72, 74, 76, 86, 105, 106, 107, 108, 110, 111], "line": [41, 54, 57, 58, 62, 72, 74, 76, 105, 106, 107, 108], "avoid": [41, 62, 89], "ucbviag": [41, 49, 60, 100], "ucbviagent_2024": 41, "31_7758b26c": 41, "wrape": 41, "writerwrapp": 41, "viagent": 41, "abov": [41, 59, 72, 74, 91, 104, 105, 106, 107], "preprocess": [41, 84], "compute_reward": [41, 72, 105, 106, 107], "global": [41, 47, 53, 54, 104], "necessari": [41, 51, 52, 59, 60, 61, 62, 72, 74, 105, 106, 107, 109], "custom": [41, 53, 54, 63, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107, 110], "set_xlim": 41, "relim": 41, "set_xscal": 41, "set_yscal": 41, "353": [41, 42, 56], "plot_writer_wrapp": [41, 42], "39": [42, 50, 54, 55, 56], "728": 42, "auto_exampl": 42, "tutori": [43, 49, 63, 64], "focu": 43, "advantag": 43, "consid": [43, 44, 66, 67], "mathcal": 43, "x": [43, 44, 56, 59, 61, 63, 72, 74, 78, 80, 81, 82, 91, 92, 93, 94, 95, 96, 105, 106, 107], "mid": 43, "probabl": [43, 44, 49, 50, 52, 61, 80, 82], "map": [43, 53, 54, 66, 67, 68, 72, 74, 76, 105, 106, 107], "overal": [43, 44], "expect": [43, 62, 64, 72, 74, 92, 93, 94, 95, 96, 105, 106, 107, 110, 111], "mathbb": [43, 44], "tau": 43, "sim": 43, "big": 43, "s_0": 43, "a_0": 43, "r_0": 43, "s_1": 43, "a_1": [43, 91], "r_1": 43, "s_2": 43, "s_t": 43, "a_t": 43, "r_t": 43, "drawn": [43, 94], "maxim": [43, 76], "previous": [43, 59], "openai": [43, 69, 72, 74, 91, 105, 106, 107], "larg": [43, 61], "although": 43, "other": [43, 49, 51, 53, 54, 56, 60, 61, 62, 64, 66, 67, 68, 71, 75, 110], "v0": [43, 51, 54, 60, 61, 72, 74, 89, 105, 106, 107, 109], "tabl": [43, 52, 53, 59], "basic": [43, 45, 53, 63, 66, 107, 108, 110], "compon": [43, 59], "occur": [43, 54, 72, 74, 105, 106, 107], "box": [43, 44, 92, 93, 94, 95, 96], "depend": [43, 45, 53, 56, 62, 63, 67, 72, 74, 87, 92, 96, 105, 106, 107], "mai": [43, 44, 48, 49, 54, 55, 61, 62, 64, 69, 72, 74, 76, 104, 105, 106, 107], "next": [43, 53, 62, 72, 74, 105, 106, 107], "compact": [43, 55], "wai": [43, 55, 56, 57, 61, 62, 63, 72, 74, 105, 106, 107, 108, 110], "deeprl": [43, 109], "default_xp": 43, "rollout": [43, 53, 55, 56, 58], "ep_rew_mean": [43, 53, 55, 56, 58], "09": [43, 55, 58], "31": [43, 50, 53, 54, 55, 56], "4096": [43, 53, 56], "ep_len_mean": [43, 53, 55, 56, 58], "fp": [43, 53, 54, 55, 56, 58], "791": 43, "time_elaps": [43, 53, 55, 56, 58], "2048": [43, 53, 55, 56, 58], "0003": [43, 53, 55, 56], "741": 43, "751": 43, "6144": [43, 53, 56], "617": 43, "entropy_loss": [43, 53, 55, 56, 61], "0967000976204873": 43, "policy_gradient_loss": [43, 53, 55, 56], "0017652213326073251": 43, "value_loss": [43, 53, 55, 56], "139": [43, 55], "4249062538147": 43, "approx_kl": [43, 53, 55, 56], "004285778850317001": 43, "clip_fract": [43, 53, 55, 56], "0044921875": 43, "loss": [43, 53, 55, 56, 61], "16": [43, 46, 50, 53, 54, 55, 56, 60], "845857620239258": 43, "explained_vari": [43, 53, 55, 56], "0011605024337768555": 43, "n_updat": [43, 53, 55, 56], "clip_rang": [43, 53, 55, 56], "100352": 43, "48": [43, 50, 53, 54, 55, 56], "89": [43, 50, 55, 58], "81": [43, 50, 55, 56], "90": [43, 50, 56, 63], "486": 43, "202": 43, "98304": 43, "19921453138813378": 43, "002730156043253373": 43, "21": [43, 50, 53, 54, 55, 56, 60, 107], "20977843105793": 43, "0014179411809891462": 43, "017626953125": 43, "601455688476562": 43, "8966712430119514": 43, "470": [43, 55, 63], "14615743807516993": 43, "002418491238495335": 43, "7100858271122": 43, "0006727844011038542": 43, "010546875": 43, "74121379852295": 43, "8884317129850388": 43, "default_2024": 43, "24_09": 43, "51_be15b329": 43, "36": [43, 50, 53, 54, 55, 56], "let": [43, 49, 57], "chang": [43, 54, 61, 62, 63, 72, 74, 76, 105, 106, 107, 108], "aim": [43, 44, 63, 108], "section": [43, 44, 49, 52, 53, 61, 64, 78], "effect": [43, 72, 74, 105, 106, 107], "demonstr": 43, "pedagog": 43, "sinc": [43, 56, 66, 67, 91, 92, 93, 94, 95, 96], "ll": [43, 47, 53, 59], "wrong": 43, "decreas": 43, "obvious": 43, "practic": [43, 48, 60, 62], "improv": [43, 63, 64], "ent_coef": [43, 57], "much": [43, 63, 69], "forc": [43, 54], "explor": [43, 49, 53, 72, 74, 105, 106, 107], "normalize_advantag": [43, 57], "trade": 43, "off": 43, "bia": [43, 48], "varianc": 43, "n_epoch": 43, "epoch": [43, 104], "surrog": 43, "incorrectli": 43, "832": [43, 55], "12": [43, 50, 53, 54, 55, 56, 63], "260": [43, 44, 55], "768": 43, "9725531369447709": 43, "175539326667786": 43, "705344581604002": 43, "028903376311063766": 43, "33828125": 43, "651824951171875": 43, "03754150867462158": 43, "220": [43, 54, 56], "251": 43, "252": [43, 56], "0311604633927345": 43, "122353088855744": 43, "18": [43, 50, 53, 55, 56, 58, 60], "54480469226837": 43, "02180374786257744": 43, "359375": 43, "690193176269531": 43, "00020706653594970703": 43, "45": [43, 50, 54, 55, 56], "tuned_2024": 43, "32_33d1646b": 43, "wors": [43, 44], "lower": [43, 48], "47": [43, 50, 54, 55, 56, 60], "perform": [44, 48, 49, 50, 54, 60, 61, 64, 67, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107], "deep": [44, 57, 61, 63, 64, 66, 67, 68, 72, 74, 105, 106, 107, 108, 111], "independ": [44, 51, 60, 92, 94, 96, 108], "abl": [44, 49, 52, 53, 61], "sai": [44, 49, 50, 52], "inde": [44, 60, 108], "perceiv": 44, "its": [44, 49, 51, 54, 55, 58, 59, 62, 63, 76, 82, 84, 111], "most": [44, 48, 59, 72, 74, 80, 82, 84, 105, 106, 107, 108], "form": [44, 64, 91], "decid": 44, "given": [44, 59, 63, 68, 69, 72, 76, 82, 84, 97, 104], "x_1": 44, "x_n": 44, "adher": [44, 61], "h_0": 44, "null": 44, "better": [44, 48, 54, 55, 62, 63], "altern": [44, 47, 48, 49, 62], "h_1": 44, "y_1": 44, "y_n": 44, "x_i": 44, "equal": [44, 50, 52, 63, 75, 82, 84], "y_i": 44, "quad": 44, "neq": 44, "both": [44, 49, 63, 87, 91], "accept": [44, 45, 53, 62, 64, 72, 74, 75, 105, 106, 107], "reject": 44, "answer": 44, "ground": 44, "truth": 44, "howev": [44, 48, 72, 74, 105, 106, 107], "often": [44, 48, 62, 72, 74, 82, 105, 106, 107], "control": [44, 52, 54, 62, 63, 72, 74, 78, 105, 106, 107], "decompos": 44, "denot": 44, "alpha": [44, 52, 59, 69, 75, 78], "respect": [44, 54, 56], "symmetr": 44, "fail": [44, 59, 66, 67, 100], "doe": [44, 48, 49, 60, 62, 63, 64, 66, 67, 72, 74, 78, 88, 92, 96, 100, 105, 106, 107], "It": [44, 45, 48, 51, 53, 55, 56, 59, 60, 62, 66, 67, 72, 74, 76, 86, 105, 106, 107, 108, 110], "simultan": [44, 61, 80, 81, 82], "must": [44, 48, 49, 56, 57, 61, 62, 64, 66, 67, 69, 76, 78, 80, 81, 82, 84, 86], "care": [44, 55, 56, 59], "accumul": 44, "cautiou": 44, "becom": [44, 52, 61], "non": [44, 51, 60, 61, 63, 72, 74, 78, 80, 81, 82, 105, 106, 107], "neglig": 44, "consequ": [44, 48], "strategi": [44, 67], "develop": [44, 53, 54, 60, 62, 64, 108, 112], "deal": 44, "To": [44, 46, 47, 48, 49, 50, 53, 54, 55, 56, 58, 59, 61, 62, 64, 69, 90, 109, 110], "There": [44, 57, 61], "famili": [44, 52, 78], "wise": [44, 52, 78], "least": 44, "mathrm": 44, "fwe": 44, "h_j": 44, "textbf": 44, "indic": [44, 50, 62, 64, 69, 72, 74, 75, 105, 106, 107], "hypothes": [44, 78], "actual": 44, "c": [44, 49, 53, 54], "rbagent": 44, "3e4": 44, "eval_ag": [44, 76], "idx": [44, 76], "obtain": [44, 49, 50, 52, 53, 67, 68, 76, 82, 84], "These": [44, 64, 110], "through": [44, 48, 53, 54, 59, 61, 67, 68, 72, 74, 104, 105, 106, 107, 110], "val": 44, "signific": [44, 52], "416": 44, "9975": 44, "00000": 44, "00250": 44, "338488": 44, "266444": 44, "38375": 44, "156": 44, "61375": 44, "179": 44, "503659": 44, "017001": 44, "0000": 44, "239": 44, "61625": 44, "80": [44, 50, 55, 56], "271521": 44, "000410": 44, "our": [44, 46, 49, 50, 54, 55, 56, 58, 59, 60, 61, 62, 64, 108, 110], "necessarili": [44, 66, 67], "same": [44, 49, 51, 52, 53, 55, 57, 59, 60, 61, 62, 69, 73, 76, 77, 82, 92, 96, 97, 99], "black": [44, 62], "sens": [44, 91, 97], "don": [44, 50, 53, 58, 59, 60, 108], "were": [44, 108], "mani": [44, 49, 60, 84], "suppos": [44, 60, 61, 78], "user": [44, 48, 53, 54, 56, 61, 62, 63, 72, 74, 76, 105, 106, 107, 108, 109], "adequ": 44, "fair": [44, 60, 94], "further": [44, 72, 74, 105, 106, 107], "littl": [44, 64], "look": [44, 46, 62, 64], "veri": [45, 46, 47, 49, 52, 53, 61], "write": [45, 51, 53, 54, 55, 60, 62, 108], "below": [45, 46, 47, 52, 53, 57, 91, 111], "param1": 45, "param2": 45, "eval_env": [45, 47, 50, 55, 63, 66, 67, 68, 70, 76], "repres": [45, 52, 53, 61, 67, 68, 69, 72, 74, 80, 81, 82, 91, 93, 105, 106, 107], "episode_reward": [45, 46, 53, 59, 82, 84], "correspond": [45, 53, 61, 66, 67, 68, 82, 84, 104, 109], "option": [45, 47, 49, 53, 54, 57, 58, 63, 66, 67, 68, 69, 71, 72, 74, 76, 80, 81, 82, 92, 93, 94, 95, 96, 104, 105, 106, 107], "With": [46, 49, 54, 61], "easi": [46, 56, 57, 108], "analyz": 46, "shown": 46, "reinforceag": 46, "stat": [46, 57, 78], "thread": [46, 51, 53, 54, 55, 63, 66, 67, 68, 76, 77, 108], "defaultwrit": [46, 58, 63, 66, 67, 68, 76], "those": 46, "sample_paratem": 46, "sent": [46, 66, 67, 68], "sure": [46, 62, 66, 67, 68, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "includ": [46, 53, 54, 57, 62, 64, 66, 67, 68, 69, 72, 74, 105, 106, 107], "optimiz": [46, 66, 67, 68], "suggest_float": [46, 57], "entr_coef": 46, "optimize_hyperparam": [46, 51, 57, 76], "n_trial": [46, 76], "stop": [46, 52, 53, 66, 67, 75, 76, 96, 108], "sampler_method": [46, 76], "optuna_default": [46, 76], "best_hyperparam": 46, "again": [46, 58, 72, 74, 105, 106, 107], "describ": [47, 62, 72, 74, 105, 106, 107], "read": [47, 64, 84, 104, 108], "succinctli": 47, "descript": [47, 61, 62, 64], "demo_experi": 47, "rsucbvi_altern": 47, "rs_ucbvi": 47, "lp_metric": 47, "max_repr": 47, "800": 47, "fit_kwarg": [47, 57, 76], "base_config": 47, "h": [47, 53, 54], "experiment_gener": 47, "multiple_manag": 47, "multiplemanag": [47, 48, 57], "multimanag": [47, 57], "experiment_manag": [47, 59, 76, 77], "standard": 48, "multiprocess": [48, 76, 77, 110], "cpu": [48, 53, 54, 55, 58], "third": [48, 60], "parti": [48, 60], "joblib": 48, "awar": 48, "nativ": [48, 53, 54], "scheme": [48, 108], "higher": 48, "top": [48, 52, 58, 62], "_thread": 48, "modul": [48, 50, 76, 77, 98, 99, 100, 102, 104], "websit": [48, 64], "without": [48, 60, 63, 64], "gil": 48, "cython": 48, "impli": 48, "new": [48, 49, 54, 60, 62, 63, 64, 66, 67, 69, 72, 74, 77, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 109, 111], "launch": [48, 64, 85], "advis": 48, "drawback": 48, "encapsul": 48, "direct": [48, 49, 50, 53, 54], "sometim": [48, 61], "avail": [48, 51, 53, 54, 60, 61, 62, 76, 88], "unix": [48, 76], "system": [48, 64], "maco": 48, "could": [48, 49, 72, 74, 104, 105, 106, 107, 108], "hang": 48, "usag": [48, 109], "still": 48, "unstabl": [48, 61], "cc": 49, "bf": 49, "qq": 49, "rr": 49, "zz": 49, "nn": 49, "panda": [49, 50, 56, 58, 61, 76, 78, 80, 81, 82, 84, 86, 104], "pd": [49, 50, 59, 80], "l": [49, 54, 61, 78], "fail_prob": [49, 61], "length": [49, 61, 67, 68, 69, 76, 84], "proba": [49, 61], "take": [49, 50, 53, 56, 57, 61, 63, 64, 67, 72, 74, 76, 80, 82, 87, 105, 106, 107, 110], "might": [49, 62, 66, 67, 72, 74, 105, 106, 107], "opposit": 49, "accord": [49, 59, 80, 81, 91], "failur": 49, "graphic": 49, "represent": [49, 72, 74, 75, 105, 106, 107], "save_gif": [49, 61, 72, 105, 106, 107], "gif_chain": [49, 61], "gif": [49, 63, 110], "clear": [49, 61, 69, 104], "clear_render_buff": [49, 61], "disable_rend": [49, 61, 72, 105, 106, 107], "design": [49, 61, 108], "One": [49, 60, 91], "featur": [49, 62, 63, 109, 110], "diagram": 49, "explain": [49, 52, 62], "briefli": 49, "few": [49, 57, 61, 62, 64, 108, 111], "word": 49, "spawn": [49, 51, 60, 63, 66, 67, 72, 74, 76, 77, 82, 83, 84, 89, 91, 93, 94, 95, 105, 106, 107], "well": 49, "arbitrari": 49, "specif": [49, 54, 55, 59, 60, 66, 67, 72, 74, 76, 105, 106, 107], "thing": [49, 56, 108], "desir": [49, 54, 64, 105], "summar": 49, "ucbvi_param": 49, "ucbvi_stat": 49, "depth": [49, 62], "methodologi": 49, "cannot": [49, 54, 63, 66, 67, 76], "simpli": [49, 57], "random_param": 49, "ucbi": 49, "ucbviagent2": 49, "5000": [49, 56], "randomagent2": [49, 50], "optimalag": 49, "allow": [49, 51, 52, 53, 55, 56, 60, 61, 63, 66, 67, 69, 76, 82, 92, 96, 107, 108, 109], "recov": 49, "henc": [49, 64, 82], "raw": [49, 80, 81, 82, 91], "instal": [49, 61, 63, 64, 87, 108, 110], "extra": [49, 53, 54, 62, 64, 66, 67, 68, 76, 109, 110], "packag": [49, 54, 62, 64, 109], "scikit": [49, 61, 62, 63, 80, 109], "fda": [49, 61, 63, 80, 109], "page": [49, 56, 63, 64, 108], "opengl_acceler": 50, "And": 50, "row": 50, "column": [50, 56, 59, 61, 78, 80, 81, 82, 84], "posit": [50, 72, 74, 95, 105, 106, 107], "plai": 50, "idea": 50, "n_simimul": 50, "redefin": 50, "tell": 50, "retreiv": 50, "super": [50, 53, 57, 72, 74, 105, 106, 107], "unus": [50, 63], "episode_regret": 50, "valueiterationagent2": 50, "addit": [50, 62, 64, 72, 73, 74, 105, 106, 107], "won": [50, 63, 72, 74, 105, 106, 107], "11": [50, 53, 54, 55, 56, 59, 63, 87], "14": [50, 53, 55, 56, 60, 63], "19": [50, 54, 55, 56, 60], "23": [50, 53, 54, 55, 56, 58, 60], "24": [50, 53, 54, 55, 56, 60], "26": [50, 55, 56, 60, 72, 74, 105, 106, 107], "27": [50, 53, 55, 56], "29": [50, 53, 54, 55, 56], "30": [50, 53, 55, 56], "34": [50, 55, 56], "38": [50, 53, 54, 56], "44": [50, 54, 55, 56], "46": [50, 53, 55, 56, 60], "49": [50, 55, 56], "52": [50, 54, 55, 56], "54": [50, 96], "55": [50, 53, 54, 55, 56], "56": [50, 53, 54, 56], "57": [50, 53, 55, 56], "58": [50, 53, 54, 55, 56], "59": [50, 53, 55, 56], "60": [50, 53, 54, 55, 56, 76], "61": [50, 56], "62": [50, 55, 56], "65": [50, 54, 56], "66": [50, 53, 55, 56], "67": [50, 55, 56], "68": [50, 56], "69": [50, 53, 54, 56], "70": [50, 53, 54, 55, 56], "71": 50, "72": [50, 53, 54, 56], "74": [50, 56], "76": [50, 53, 54, 55, 56], "77": [50, 55], "78": [50, 53, 56], "79": [50, 55, 56], "82": [50, 55, 56, 58], "86": [50, 55], "87": [50, 53, 54, 55, 56], "88": [50, 56], "91": [50, 53, 54, 55, 56], "92": [50, 53, 55], "94": [50, 54, 55, 56], "96": [50, 53, 55, 56], "97": [50, 55, 63], "98": [50, 53], "datafram": [50, 56, 58, 59, 61, 75, 76, 78, 79, 80, 81, 82, 84, 86, 108], "tolist": 50, "axessubplot": 50, "linear": 50, "seem": 50, "around": [50, 54, 61, 80, 81, 82, 109], "intend": [50, 64], "target": 50, "rlberry_": [51, 57], "conveni": [51, 60, 108], "wrap": [51, 53, 55, 57, 58, 60, 61, 63, 68, 70, 72, 73, 74, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "seedsequ": [51, 60, 66, 67, 72, 74, 76, 89, 91, 93, 94, 95, 105, 106, 107, 108], "singl": [51, 54, 60, 61, 72, 74, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "refer": [51, 62, 63, 66, 67, 68, 69, 72, 74, 78, 89, 105, 106, 107], "html": [51, 53, 54, 64, 69, 71, 76, 78, 89, 108], "integ": [51, 56, 59, 60, 72, 74, 89, 90, 92, 93, 95, 96, 105, 106, 107], "own": [51, 56, 60, 108, 110], "inherit": [51, 54, 60, 66, 67, 91, 92, 93, 94, 95, 96], "whenev": [51, 60], "appli": [51, 60, 70, 72, 74, 82, 84, 104, 105, 106, 107], "seeder1": [51, 60], "seeder2": [51, 60], "extern": [51, 90, 110], "set_external_se": [51, 60], "inher": 51, "faq": 51, "reliabl": [52, 108, 109], "introduc": [52, 63], "arxiv": 52, "2306": 52, "10882": 52, "group": 52, "permut": [52, 75, 78], "especi": 52, "easier": [52, 54, 61, 108, 110], "bind": [52, 109], "choos": [52, 53, 58, 67, 80, 82, 110, 111], "rational": 52, "scienc": 52, "viabl": 52, "rank": 52, "theoret": 52, "repetit": 52, "soon": 52, "pleas": [52, 62, 64, 72, 74, 105, 106, 107], "realli": 52, "certain": [52, 72, 74, 105, 106, 107], "limit": [52, 104], "maximum": [52, 63, 67, 68, 69, 74, 75, 76], "batch": [52, 69, 91, 92, 94, 95, 96], "ask": [52, 61], "wrongli": 52, "format": [52, 61, 62, 80, 81], "print_result": [52, 75], "score": [52, 75, 78], "271": 52, "17600000000004": 52, "plot_result": [52, 75], "larger": [52, 61, 75, 80, 82], "entiti": 53, "api": [53, 54, 55, 62, 72, 74, 105, 106, 107, 108, 110], "guid": [53, 56, 59, 62, 63, 64, 108], "renderinterfac": [53, 54, 72], "_agent_page_chain1": 53, "cross": [53, 62, 109], "creation": [53, 63], "_agent_page_chain2": 53, "n_iter": [53, 75], "269": [53, 56, 63], "06": [53, 55, 56], "pg": 53, "displai": [53, 56, 72, 74, 105, 106, 107], "set_mod": 53, "doublebuf": 53, "opengl": [53, 54, 63], "ffmpeg": [53, 54, 61, 109], "0ubuntu0": [53, 54], "copyright": [53, 54], "built": [53, 54], "gcc": [53, 54], "19ubuntu1": [53, 54], "configur": [53, 54, 56, 57, 62, 69, 71], "prefix": [53, 54, 62], "usr": [53, 54], "toolchain": [53, 54], "harden": [53, 54], "libdir": [53, 54], "lib": [53, 54, 66], "x86_64": [53, 54], "gnu": [53, 54], "incdir": [53, 54], "arch": [53, 54], "amd64": [53, 54], "enabl": [53, 54, 55, 69, 76], "gpl": [53, 54], "disabl": [53, 54, 76], "strip": [53, 54], "gnutl": [53, 54], "ladspa": [53, 54], "libaom": [53, 54], "libass": [53, 54], "libblurai": [53, 54], "libbs2b": [53, 54], "libcaca": [53, 54], "libcdio": [53, 54], "libcodec2": [53, 54], "libdav1d": [53, 54], "libflit": [53, 54], "libfontconfig": [53, 54], "libfreetyp": [53, 54], "libfribidi": [53, 54], "libgm": [53, 54], "libgsm": [53, 54], "libjack": [53, 54], "libmp3lam": [53, 54], "libmysofa": [53, 54], "libopenjpeg": [53, 54], "libopenmpt": [53, 54], "libopu": [53, 54], "libpuls": [53, 54], "librabbitmq": [53, 54], "librubberband": [53, 54], "libshin": [53, 54], "libsnappi": [53, 54], "libsoxr": [53, 54], "libspeex": [53, 54], "libsrt": [53, 54], "libssh": [53, 54], "libtheora": [53, 54], "libtwolam": [53, 54], "libvidstab": [53, 54], "libvorbi": [53, 54], "libvpx": [53, 54], "libwebp": [53, 54], "libx265": [53, 54], "libxml2": [53, 54], "libxvid": [53, 54], "libzimg": [53, 54], "libzmq": [53, 54], "libzvbi": [53, 54], "lv2": [53, 54], "omx": [53, 54], "open": [53, 54, 64], "opencl": [53, 54], "sdl2": [53, 54], "pocketsphinx": [53, 54], "librsvg": [53, 54], "libmfx": [53, 54], "libdc1394": [53, 54], "libdrm": [53, 54], "libiec61883": [53, 54], "chromaprint": [53, 54], "frei0r": [53, 54], "libx264": [53, 54], "share": [53, 54, 66, 67, 68, 76], "libavutil": [53, 54], "libavcodec": [53, 54], "134": [53, 54], "libavformat": [53, 54], "libavdevic": [53, 54], "libavfilt": [53, 54], "110": [53, 54, 55, 56], "libswscal": [53, 54], "libswresampl": [53, 54], "libpostproc": [53, 54], "input": [53, 54, 56, 61, 66, 67, 72, 76, 78, 82, 89, 108], "rawvideo": [53, 54], "pipe": [53, 54], "durat": [53, 54], "000000": [53, 54], "bitrat": [53, 54], "38400": [53, 54], "kb": [53, 54], "stream": [53, 54, 109], "rgb": [53, 54, 72, 74, 105, 106, 107], "0x18424752": [53, 54], "rgb24": [53, 54], "800x80": [53, 54], "tbr": [53, 54], "tbn": [53, 54], "tbc": [53, 54], "h264": [53, 54], "0x5570932967c0": 53, "capabl": [53, 54], "mmx2": [53, 54], "sse2fast": [53, 54], "ssse3": [53, 54], "sse4": [53, 54], "avx": [53, 54], "fma3": [53, 54], "bmi2": [53, 54], "avx2": [53, 54], "avx512": [53, 54], "profil": [53, 54, 63, 76], "high": [53, 54, 91], "bit": [53, 54], "264": [53, 54, 63], "163": [53, 54, 56], "r3060": [53, 54], "5db6aa6": [53, 54], "mpeg": [53, 54], "avc": [53, 54], "codec": [53, 54, 61], "copyleft": [53, 54], "2003": [53, 54], "videolan": [53, 54], "x264": [53, 54], "cabac": [53, 54], "ref": [53, 54], "deblock": [53, 54], "analys": [53, 54], "0x3": [53, 54], "0x113": [53, 54], "me": [53, 54], "hex": [53, 54], "subm": [53, 54], "psy": [53, 54], "psy_rd": [53, 54], "mixed_ref": [53, 54], "me_rang": [53, 54], "chroma_m": [53, 54], "trelli": [53, 54], "8x8dct": [53, 54], "cqm": [53, 54], "deadzon": [53, 54], "fast_pskip": [53, 54], "chroma_qp_offset": [53, 54], "lookahead_thread": [53, 54], "sliced_thread": [53, 54], "nr": [53, 54], "decim": [53, 54], "interlac": [53, 54], "bluray_compat": [53, 54], "constrained_intra": [53, 54], "bframe": [53, 54], "b_pyramid": [53, 54], "b_adapt": [53, 54], "b_bia": [53, 54], "weightb": [53, 54], "open_gop": [53, 54], "weightp": [53, 54], "keyint": [53, 54], "250": [53, 54, 55, 63], "keyint_min": [53, 54], "scenecut": [53, 54], "intra_refresh": [53, 54], "rc_lookahead": [53, 54], "rc": [53, 54], "crf": [53, 54], "mbtree": [53, 54], "qcomp": [53, 54], "qpmin": [53, 54], "qpmax": [53, 54], "qpstep": [53, 54], "ip_ratio": [53, 54], "aq": [53, 54], "_agent_page_chain": 53, "encod": [53, 54], "lavf58": [53, 54], "avc1": [53, 54], "0x31637661": [53, 54], "yuv420p": [53, 54], "tv": [53, 54], "progress": [53, 54, 62, 76], "12800": [53, 54], "lavc58": [53, 54], "side": [53, 54], "cpb": [53, 54], "min": [53, 54], "avg": [53, 54], "buffer": [53, 54, 63, 69], "vbv_delai": [53, 54], "frame": [53, 54, 61, 62, 72, 74, 105, 106, 107], "lsize": [53, 54], "12kb": 53, "9kbit": 53, "speed": [53, 54], "8x": 53, "11kb": [53, 54], "audio": [53, 54, 109], "0kb": [53, 54], "subtitl": [53, 54], "header": [53, 54], "mux": [53, 54], "overhead": [53, 54], "817029": 53, "qp": [53, 54], "6089": 53, "172": 53, "consecut": [53, 54, 104], "i16": [53, 54], "p16": [53, 54], "skip": [53, 54, 62], "b16": [53, 54], "l0": [53, 54], "l1": [53, 54], "bi": [53, 54], "8x8": [53, 54], "transform": [53, 54], "intra": [53, 54], "inter": [53, 54], "uvdc": [53, 54], "uvac": [53, 54], "dc": [53, 54], "i8": [53, 54], "ddl": [53, 54], "ddr": [53, 54], "vr": [53, 54], "hd": [53, 54], "vl": [53, 54], "hu": [53, 54], "i4": [53, 54], "i8c": [53, 54], "weight": [53, 54, 60, 69], "uv": [53, 54], "stablebaseline3": 53, "video_fold": [53, 54, 55], "name_prefix": [53, 54, 55], "devic": [53, 55, 58], "monitor": [53, 55, 58], "dummyvecenv": [53, 55, 58], "2490": 53, "1842": 53, "009214947": 53, "102": [53, 55], "686": 53, "00179": 53, "0158": 53, "1708": 53, "009872524": 53, "0705": 53, "666": 53, "119": [53, 55, 63], "0195": 53, "6860913151875139": 53, "015838009686558508": 53, "528612112998964": 53, "009214947000145912": 53, "10205078125": 53, "420166969299316": 53, "001785874366760254": 53, "1674": 53, "8192": [53, 56], "0076105352": 53, "634": 53, "246": [53, 56], "0151": 53, "1655": 53, "10240": [53, 56], "006019583": 53, "0597": 53, "606": 53, "238": 53, "0147": 53, "moviepi": [53, 54, 55], "build": [53, 54, 55], "yourpath": [53, 55], "readi": [53, 54, 55, 64], "advanc": [53, 54], "myagentqlearn": 53, "exploration_r": [53, 59], "discount_factor": 53, "state_space_s": 53, "action_space_s": 53, "percentag": 53, "q_tabl": 53, "store": [53, 55, 58, 59, 61, 66, 67, 68, 69, 76, 104], "next_step": 53, "explo": 53, "rand": 53, "exploit": 53, "frozenlak": [53, 59], "is_slipperi": [53, 59], "remov": [53, 54, 63, 72, 74, 105, 106, 107], "slipperi": 53, "100000": [53, 67, 68], "content": [53, 59], "frozenlake_no_slipperi": 53, "73509189": [53, 59], "77378094": [53, 59], "81450625": [53, 59], "857375": [53, 59], "9025": [53, 59], "element": [53, 55, 72, 74, 95, 105, 106, 107, 108], "rule": 54, "_env_page_chain": 54, "0x5644b31f07c0": 54, "4kbit": 54, "6x": 54, "10kb": 54, "128633": 54, "6175": 54, "124": [54, 55], "common": [54, 68, 71], "mountain": 54, "car": 54, "mujoco": 54, "classic": [54, 108], "add": [54, 55, 58, 59, 60, 62, 63, 66, 67, 68, 91, 92, 93, 94, 95, 96, 109], "conda": [54, 109], "python3": [54, 62, 63, 64], "overwrit": 54, "specifi": [54, 55, 59, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "311": [54, 56], "is_vector_env": 54, "deprec": [54, 59, 72, 74, 105, 106, 107], "unwrap": [54, 72, 74, 105, 106, 107], "get_wrapper_attr": [54, 72, 74, 105, 106, 107], "search": [54, 66, 67, 76, 80, 82, 104], "remind": [54, 110, 111], "2600": [54, 109], "stella": 54, "arcad": [54, 109], "game": [54, 63, 109], "life": [54, 108], "53f58b7": 54, "power": [54, 75], "passive_env_check": 54, "335": [54, 56, 63], "declar": 54, "render_fp": 54, "inconsist": 54, "child": [54, 60], "templat": [54, 62], "solut": [55, 109], "recommend": [55, 59, 66, 89, 96], "stablebaselines3": [55, 56, 57, 58, 61, 63, 68, 110], "env_id": [55, 58, 59, 60], "first_experi": [55, 58, 60], "tupl": [55, 58, 59, 60, 63, 66, 67, 68, 70, 74, 75, 76, 91, 92, 93, 94, 95, 98, 99, 100, 102, 105], "ppo_first_experi": [55, 58, 60], "ppo_first_experimentcartpol": [55, 58, 60], "2977": [55, 58], "v1_2024": [55, 58], "12_09": [55, 58], "10_3a9fa8ad": [55, 58], "121": [55, 56, 58], "bigger": 55, "fine": 55, "second_experi": 55, "ppo_second_experi": 55, "ppo_second_experimentcartpol": 55, "2688": 55, "044444444444444": 55, "888": 55, "2592": 55, "6261792600154876": 55, "001418954369607306": 55, "49215440750122": 55, "0018317258218303323": 55, "3124942779541": 55, "33643925189971924": 55, "5568": 55, "19354838709677": 55, "916": 55, "5472": 55, "617610102891922": 55, "0007477130696315725": 55, "27523021697998": 55, "8932236343971454e": 55, "402034759521484": 55, "46521711349487305": 55, "560": 55, "8640": 55, "107": [55, 56], "29113924050633": 55, "946": 55, "8544": 55, "5820738852024079": 55, "008271816929482156": 55, "279": [55, 56], "90625591278075": 55, "005026700906455517": 55, "03750000102445483": 55, "192": [55, 56], "93894958496094": 55, "00014603137969970703": 55, "880": 55, "45_77245043": 55, "108": [55, 56, 59], "130": [55, 56], "166": 55, "imag": [55, 72, 74, 105, 106, 107], "succe": 55, "pipelineenv": [55, 63], "eval_env_ctor": 55, "eval_env_kwarg": 55, "third_experi": 55, "ppo_third_experi": 55, "output3": 55, "ppo_third_experimentcartpol": 55, "1920": 55, "146341463414636": 55, "687": 55, "1824": 55, "612512381374836": 55, "004653797230503187": 55, "76153821945191": 55, "008641918189823627": 55, "03333333339542151": 55, "162071228027344": 55, "3032127618789673": 55, "180": [55, 56, 63], "4704": 55, "20689655172414": 55, "804": 55, "4608": 55, "5940127298235893": 55, "016441003710982238": 55, "154": 55, "39369611740113": 55, "010226544924080372": 55, "07500000102445484": 55, "81913375854492": 55, "005669653415679932": 55, "7392": 55, "08108108108108": 55, "826": 55, "7296": 55, "5620817124843598": 55, "0007149307257350301": 55, "1684087753296": 55, "00030671278364025056": 55, "46017837524414": 55, "4496734142303467": 55, "750": 55, "9984": 55, "103": 55, "113": 55, "64285714285714": 55, "9888": 55, "5782853797078132": 55, "012480927801546693": 55, "679842436313628": 55, "013762158341705799": 55, "04479166660457849": 55, "8429009914398193": 55, "32027459144592285": 55, "1020": 55, "09_da4411b3": 55, "fourth_experi": 55, "15000": 55, "ppo_fourth_experi": 55, "fourth_experiment_result": 55, "1440": 55, "86046511627907": 55, "497": 55, "1344": 55, "6368376821279526": 55, "0030540588200588916": 55, "8653003692627": 55, "0012531293323263526": 55, "012786865234375": 55, "270730197429657": 55, "1536": 55, "22857142857143": 55, "502": 55, "618910662829876": 55, "007122196507649825": 55, "85383853912353": 55, "004074861295521259": 55, "009375000186264516": 55, "4535026550293": 55, "02206575870513916": 55, "140": [55, 56], "2976": 55, "49090909090909": 55, "2880": 55, "5889034822583199": 55, "010608512769977096": 55, "22348279953003": 55, "004636458586901426": 55, "06354166707023978": 55, "387840270996094": 55, "16999149322509766": 55, "290": 55, "3072": 55, "510": 55, "5197424411773681": 55, "00876332552181035": 55, "44287853240967": 55, "0023070008028298616": 55, "723819732666016": 55, "12456077337265015": 55, "4416": 55, "71428571428571": 55, "490": 55, "4320": 55, "6150185167789459": 55, "011918687870623534": 55, "91612710952759": 55, "012545783072710037": 55, "07500000055879355": 55, "261075973510742": 55, "5195063650608063": 55, "440": 55, "05357142857143": 55, "484": 55, "5382911033928395": 55, "01824581954351743": 55, "797289085388186": 55, "009921143762767315": 55, "11458333358168601": 55, "537925720214844": 55, "77537801861763": 55, "5760": 55, "475": 55, "5664": 55, "6097852572798729": 55, "005027322360729158": 55, "29339656829834": 55, "0017487265868112445": 55, "345821380615234": 55, "14309996366500854": 55, "580": 55, "5856": 55, "72058823529412": 55, "473": 55, "5608772337436676": 55, "000609715585354298": 55, "05806636810303": 55, "0004261930880602449": 55, "013322830200195": 55, "13966631889343262": 55, "590": 55, "27631578947368": 55, "479": 55, "7200": 55, "5558830961585045": 55, "0035663537412043756": 55, "799719190597536": 55, "010704685002565384": 55, "026041666883975266": 55, "911317825317383": 55, "7546885907649994": 55, "740": 55, "7488": 55, "78378378378379": 55, "481": 55, "5342976003885269": 55, "003940091139186919": 55, "248546028137206": 55, "0034270065370947123": 55, "006250000186264515": 55, "23060417175293": 55, "000810086727142334": 55, "760": 55, "8832": 55, "8021978021978": 55, "483": 55, "8736": 55, "5868215769529342": 55, "003875821301941573": 55, "918508291244507": 55, "003322127740830183": 55, "01250000037252903": 55, "372678279876709": 55, "9379729703068733": 55, "900": 55, "8928": 55, "62025316455696": 55, "477": 55, "5886116042733193": 55, "0061642722316454625": 55, "651307249069214": 55, "00532135833054781": 55, "02083333395421505": 55, "704312801361084": 55, "9291621446609497": 55, "910": 55, "10272": 55, "106": 55, "26530612244898": 55, "10176": 55, "5608879745006561": 55, "00618959182217318": 55, "341": [55, 56], "5462481498718": 55, "002049457747489214": 55, "115": [55, 56], "31817626953125": 55, "03178894519805908": 55, "1050": 55, "10464": 55, "120": [55, 56], "68235294117648": 55, "480": 55, "10368": 55, "5731720849871635": 55, "008771563362703683": 55, "809": 55, "6955997467041": 55, "004173839930444956": 55, "018750000465661287": 55, "382": [55, 63], "14801025390625": 55, "0032292604446411133": 55, "1070": 55, "11616": 55, "474": [55, 63], "11520": 55, "5582666076719761": 55, "006921725869559215": 55, "403": [55, 56], "52278537750243": 55, "002198959467932582": 55, "240": 55, "1759490966797": 55, "19455385208129883": 55, "1190": 55, "11808": 55, "122": [55, 56], "129": [55, 56], "01111111111112": 55, "11712": 55, "5791081696748733": 55, "0027768491202399214": 55, "579": 55, "2247428894043": 55, "0006394482916221023": 55, "69767761230469": 55, "7034773826599121": 55, "1210": 55, "12864": 55, "133": 55, "467": [55, 63], "12768": 55, "585125806927681": 55, "0028700591409382527": 55, "194953203201294": 55, "007809734903275967": 55, "015625000279396773": 55, "225722789764404": 55, "04570794105529785": 55, "1320": 55, "13152": 55, "136": [55, 56], "137": 55, "4516129032258": 55, "13056": 55, "566350145637989": 55, "004751363794999452": 55, "131759536266326": 55, "0031535024754703045": 55, "002083333395421505": 55, "930537462234497": 55, "9139308258891106": 55, "1350": 55, "14112": 55, "146": [55, 56], "458": 55, "14016": 55, "5963118925690651": 55, "007955966270916815": 55, "367142927646636": 55, "010106074623763561": 55, "07291666744276881": 55, "582631826400757": 55, "14140963554382324": 55, "1450": 55, "14304": 55, "148": [55, 63], "145": 55, "38144329896906": 55, "457": [55, 63], "14208": 55, "5282964281737804": 55, "0037513579605426006": 55, "7543770960532129": 55, "003314702305942774": 55, "04687500018626452": 55, "028775174170732498": 55, "9980020457878709": 55, "1470": 55, "974358974358974": 55, "429": [55, 56], "1248": 55, "6186478942632675": 55, "01541837720311987": 55, "90045881271362": 55, "008347732946276665": 55, "05625000074505806": 55, "35782241821289": 55, "03064143657684326": 55, "27777777777778": 55, "420": 55, "6108133271336555": 55, "005322299412182474": 55, "101": [55, 56], "77589092254638": 55, "0019470953848212957": 55, "0010416666977107526": 55, "62103271484375": 55, "07671612501144409": 55, "791666666666664": 55, "421": 55, "2496": 55, "6129413187503815": 55, "0026073096491330714": 55, "97837677001954": 55, "0008606038172729313": 55, "147621154785156": 55, "07078427076339722": 55, "2784": 55, "433": 55, "585808028280735": 55, "000618002787662908": 55, "70521297454835": 55, "0011669064406305552": 55, "0313491821289": 55, "03644925355911255": 55, "270": 55, "3936": 55, "172413793103445": 55, "426": [55, 56], "3840": 55, "5633251592516899": 55, "009321122000134574": 55, "42370948791503": 55, "004084523767232895": 55, "025000000558793544": 55, "39215087890625": 55, "008745789527893066": 55, "390": [55, 63], "4224": 55, "49230769230769": 55, "442": 55, "4128": 55, "5803879588842392": 55, "014389420735763759": 55, "104": 55, "66002407073975": 55, "004097627475857735": 55, "0406250006519258": 55, "91357421875": 55, "06607359647750854": 55, "5376": 55, "64179104477611": 55, "436": [55, 63], "5280": 55, "5912896677851677": 55, "005140897812877121": 55, "03294086456299": 55, "0011872043833136559": 55, "535093307495117": 55, "058519959449768066": 55, "540": 55, "6103896103896": 55, "5753983780741692": 55, "007284667211085139": 55, "77001705169678": 55, "006244419142603874": 55, "018750000558793545": 55, "275583267211914": 55, "08379793167114258": 55, "6816": 55, "43835616438356": 55, "444": [55, 63], "6720": 55, "5323616154491901": 55, "007125963812965574": 55, "626363372802736": 55, "006300304085016251": 55, "037500000651925804": 55, "018963813781738": 55, "9616018049418926": 55, "690": 55, "7104": 55, "64044943820225": 55, "452": 55, "7008": 55, "6117519900202751": 55, "00272596117890016": 55, "280": [55, 56], "87690296173093": 55, "0006742061232216656": 55, "177": 55, "05584716796875": 55, "23516911268234253": 55, "720": 55, "8160": 55, "7710843373494": 55, "439": 55, "8064": 55, "57562275826931": 55, "007087657243634205": 55, "132426935434342": 55, "006503245793282986": 55, "013541666883975267": 55, "0611423254013062": 55, "9250432252883911": 55, "830": 55, "8352": 55, "17171717171718": 55, "441": 55, "8256": 55, "5499906323850154": 55, "006553472934062299": 55, "234399175643922": 55, "0049788737669587135": 55, "01979166707023978": 55, "909204483032227": 55, "6594350934028625": 55, "850": 55, "9600": 55, "9504": 55, "5261909484863281": 55, "0017483091195268584": 55, "570764398574829": 55, "003514515236020088": 55, "586752414703369": 55, "13919365406036377": 55, "980": 55, "9792": 55, "447": [55, 63], "9696": 55, "5318385265767575": 55, "011880275755174807": 55, "452494937181473": 55, "005105054937303066": 55, "026041667256504298": 55, "5928417444229126": 55, "9581267684698105": 55, "11136": 55, "93406593406593": 55, "450": 55, "11040": 55, "597024767100811": 55, "0026963132240780396": 55, "00028538703918": 55, "00123556365724653": 55, "44751739501953": 55, "7172763645648956": 55, "1140": 55, "11232": 55, "116": 55, "449": 55, "5200830087065696": 55, "007722906641962868": 55, "537515223026276": 55, "007102598436176777": 55, "09479166679084301": 55, "2106761932373047": 55, "3601408004760742": 55, "1150": 55, "12672": 55, "131": 55, "17021276595744": 55, "454": [55, 63], "12576": 55, "4788337767124176": 55, "038107051831805926": 55, "19971267301589252": 55, "03344881534576416": 55, "4125000021420419": 55, "027751892805099487": 55, "9155157506465912": 55, "1300": 55, "451": [55, 63], "43379202783107756": 55, "0009571537776840389": 55, "2258590620011092": 55, "00857666414231062": 55, "02291666679084301": 55, "1831377148628235": 55, "9741729144006968": 55, "13824": 55, "143": [55, 56], "138": 55, "340206185567": 55, "448": [55, 63], "13728": 55, "644160869717598": 55, "008214838248265188": 55, "01987018278450705": 55, "013853602111339569": 55, "051041666977107526": 55, "009712214581668377": 55, "004491209983825684": 55, "1420": 55, "446": 55, "49214922785758974": 55, "0010527112196238697": 55, "321832603216171": 55, "0050249057821929455": 55, "05833333460614085": 55, "8884248733520508": 55, "7490366697311401": 55, "14976": 55, "155": 55, "14880": 55, "564744371175766": 55, "00030555504467870697": 55, "583": 55, "7946674346924": 55, "618979685095837e": 55, "178": [55, 56], "1756591796875": 55, "13469618558883667": 55, "1540": 55, "48720408231019974": 55, "004510738217747256": 55, "15749059994705022": 55, "00864747166633606": 55, "051041667256504296": 55, "0023154467344284058": 55, "05674338340759277": 55, "19_3d4e7443": 55, "ppo_fourth_experimentcartpol": 55, "Be": [55, 56, 59], "visibl": 56, "mayb": [56, 60, 72, 74, 105, 106, 107], "favorit": 56, "569767441860463": 56, "591": 56, "903225806451612": 56, "567": 56, "49425287356322": 56, "557": 56, "437": 56, "6862980721518397": 56, "016145382329705173": 56, "95402302145958": 56, "009136519394814968": 56, "1068359375": 56, "268213748931885": 56, "00011879205703735352": 56, "6861314654350281": 56, "016842093877494337": 56, "17323541939258": 56, "007978597655892372": 56, "1025390625": 56, "8147406578063965": 56, "0003063678741455078": 56, "6855484075844288": 56, "015410382760455832": 56, "32087602615356": 56, "008056383579969406": 56, "105224609375": 56, "251166343688965": 56, "012730419635772705": 56, "409": 56, "6685062969103456": 56, "014946110408345703": 56, "33342697024345": 56, "008881180547177792": 56, "060693359375": 56, "630510330200195": 56, "1108359694480896": 56, "6661150485277176": 56, "013149463082663715": 56, "683698976039885": 56, "007977155968546867": 56, "043798828125": 56, "9081449508667": 56, "05941134691238403": 56, "402": 56, "6675648905336857": 56, "01585175626023556": 56, "83039126396179": 56, "008422331884503365": 56, "05068359375": 56, "283363342285156": 56, "06431382894515991": 56, "397": [56, 63], "6372709095478057": 56, "021793167035502846": 56, "082052528858185": 56, "008312474004924297": 56, "09052734375": 56, "487403869628906": 56, "29079967737197876": 56, "392": 56, "6271074561402201": 56, "021605250079301187": 56, "17835917472839": 56, "01045585609972477": 56, "107275390625": 56, "300893783569336": 56, "24486440420150757": 56, "389": 56, "641490114107728": 56, "01604906824504724": 56, "91851507425308": 56, "007528345100581646": 56, "0734375": 56, "153453826904297": 56, "22841238975524902": 56, "12288": 56, "374": 56, "6044564859941601": 56, "016754490803577937": 56, "31612868309021": 56, "009068363346159458": 56, "078857421875": 56, "16673469543457": 56, "30177778005599976": 56, "371": 56, "6121436970308423": 56, "014887585233373102": 56, "94282633662224": 56, "005902732722461224": 56, "049267578125": 56, "8435115814209": 56, "21425354480743408": 56, "368": 56, "621853212080896": 56, "01637536641501356": 56, "13811606168747": 56, "008492568507790565": 56, "06396484375": 56, "353282928466797": 56, "31684231758117676": 56, "14336": 56, "367": 56, "5713022822514177": 56, "01559052456432255": 56, "737575674057005": 56, "00888746790587902": 56, "071826171875": 56, "2188663482666": 56, "43151962757110596": 56, "365": [56, 63], "5959413398057223": 56, "01293433145910967": 56, "95801417827606": 56, "007563581224530935": 56, "06982421875": 56, "49068832397461": 56, "40706634521484375": 56, "362": 56, "6087406625971198": 56, "011938219325384126": 56, "20582329630852": 56, "005129554774612188": 56, "04287109375": 56, "536352157592773": 56, "3696613907814026": 56, "16384": 56, "363": 56, "5794724302366376": 56, "004287737552658655": 56, "43672263324261": 56, "0037438003346323967": 56, "014404296875": 56, "200799465179443": 56, "6620278060436249": 56, "360": 56, "5922138599678874": 56, "012010189255670411": 56, "09716731309891": 56, "007144401781260967": 56, "075146484375": 56, "848328590393066": 56, "5530484616756439": 56, "357": 56, "6040949983522296": 56, "009169524490425828": 56, "84913797974586": 56, "007860680110752583": 56, "072705078125": 56, "374231338500977": 56, "7407508194446564": 56, "18432": 56, "111": 56, "5607249280437827": 56, "002986471042095218": 56, "21346059292555": 56, "003013045061379671": 56, "011279296875": 56, "299112319946289": 56, "8296276032924652": 56, "112": 56, "358": 56, "5921528477221727": 56, "01051775121013634": 56, "23670785278082": 56, "005722516216337681": 56, "06689453125": 56, "937105655670166": 56, "7620555758476257": 56, "109": 56, "355": 56, "5929792949929833": 56, "005616791581269353": 56, "9369278550148": 56, "0032515935599803925": 56, "02109375": 56, "186660766601562": 56, "6016848087310791": 56, "20480": 56, "125": 56, "5653722988441586": 56, "008493624679249478": 56, "953543305397034": 56, "005177437327802181": 56, "07109375": 56, "79820442199707": 56, "7749437093734741": 56, "5738503985106945": 56, "005740263756888453": 56, "06800128221512": 56, "006686339154839516": 56, "03720703125": 56, "82557487487793": 56, "33640867471694946": 56, "351": 56, "5821122424677014": 56, "0035777818571659735": 56, "145361164212225": 56, "004387532360851765": 56, "018701171875": 56, "294953346252441": 56, "6190232038497925": 56, "22528": 56, "141": 56, "354": 56, "5748784447088837": 56, "008402446379477624": 56, "196025171130895": 56, "005493971519172192": 56, "05244140625": 56, "0958304405212402": 56, "9052915200591087": 56, "352": 56, "55838915547356": 56, "008732947133103153": 56, "26576453149319": 56, "005845913663506508": 56, "065673828125": 56, "673324584960938": 56, "7672396898269653": 56, "5832941999658943": 56, "010998867846501526": 56, "56470604687929": 56, "006126352585852146": 56, "073388671875": 56, "2158937454223633": 56, "8782470673322678": 56, "24576": 56, "157": 56, "5647322304546833": 56, "007764048119133804": 56, "263426271080974": 56, "007562276907265186": 56, "090771484375": 56, "511579513549805": 56, "777026578783989": 56, "164": 56, "5593959849327803": 56, "0112069135720958": 56, "52521513402462": 56, "012146038934588432": 56, "162939453125": 56, "99325180053711": 56, "779657244682312": 56, "5535886317491532": 56, "003764605871401727": 56, "87041089832783": 56, "007615496404469013": 56, "03154296875": 56, "99373245239258": 56, "35959136486053467": 56, "26624": 56, "175": 56, "5552531754598021": 56, "005408551605069078": 56, "160164260864256": 56, "005178069695830345": 56, "025634765625": 56, "7170295715332": 56, "7826626151800156": 56, "182": 56, "5358171337284148": 56, "00488179410531302": 56, "18989806524478": 56, "0034808891359716654": 56, "06181640625": 56, "12967732548713684": 56, "16228169202804565": 56, "350": [56, 63], "5562285710126161": 56, "004001504971529357": 56, "59976389706135": 56, "002194597851485014": 56, "021923828125": 56, "274383068084717": 56, "8627262711524963": 56, "28672": 56, "5598000731319189": 56, "004672619019402191": 56, "96599825024605": 56, "0037293194327503443": 56, "034814453125": 56, "138860702514648": 56, "9212513640522957": 56, "201": 56, "5327114884741604": 56, "002263979368581204": 56, "354254606366155": 56, "0018954614643007517": 56, "00458984375": 56, "1828498840332": 56, "01690804958343506": 56, "194": 56, "5350399187766015": 56, "010988622946024406": 56, "65103582441807": 56, "012016495689749718": 56, "100732421875": 56, "403335094451904": 56, "8912321701645851": 56, "30720": 56, "208": 56, "5623490344733": 56, "007931908047612523": 56, "936047033965586": 56, "004620042629539967": 56, "0498046875": 56, "1860785484313965": 56, "8344163149595261": 56, "218": 56, "349": 56, "514452669210732": 56, "0014171435825119261": 56, "242323934612796": 56, "006443873047828674": 56, "032568359375": 56, "14281976222991943": 56, "007233858108520508": 56, "211": 56, "348": 56, "5367143749259412": 56, "01454816997575108": 56, "89514188542962": 56, "009338829666376114": 56, "122314453125": 56, "663129806518555": 56, "9458933025598526": 56, "32768": 56, "225": 56, "539276737626642": 56, "0037407161165901926": 56, "433760127052665": 56, "013978826813399792": 56, "064990234375": 56, "37698429822921753": 56, "035490989685058594": 56, "235": [56, 63], "4973093102686107": 56, "012424326899053994": 56, "345036637177691": 56, "008750807493925095": 56, "106884765625": 56, "18800251185894012": 56, "7767911404371262": 56, "230": 56, "346": 56, "5259521684609354": 56, "02139304491574876": 56, "581413919106126": 56, "012810716405510902": 56, "201708984375": 56, "7710778713226318": 56, "8799830973148346": 56, "34816": 56, "243": 56, "5016440353356302": 56, "005388573392338003": 56, "6560175356687978": 56, "0062754955142736435": 56, "066552734375": 56, "09270089864730835": 56, "12096387147903442": 56, "150": 56, "347": 56, "4738074015825987": 56, "0019494367443257943": 56, "4576879689877387": 56, "005790143273770809": 56, "037353515625": 56, "18722578883171082": 56, "35352087020874023": 56, "345": 56, "5140718438662588": 56, "0004109115216124337": 56, "4875038336322177": 56, "0043577756732702255": 56, "02763671875": 56, "17618514597415924": 56, "08825933933258057": 56, "36864": 56, "257": 56, "505179504211992": 56, "0033724807828548363": 56, "052925960079301": 56, "01005391776561737": 56, "107958984375": 56, "09074155241250992": 56, "022495508193969727": 56, "160": 56, "343": 56, "48579485388472676": 56, "9661558296065775e": 56, "9125513993494678": 56, "005249223671853542": 56, "029833984375": 56, "011615638621151447": 56, "20920252799987793": 56, "262": [56, 63], "342": 56, "5235147284343838": 56, "003425118201994337": 56, "1361884556215955": 56, "005556339398026466": 56, "04345703125": 56, "04526910558342934": 56, "0790131688117981": 56, "38912": 56, "5124423679895699": 56, "320563549408689e": 56, "6837369541579392": 56, "0015420113923028111": 56, "011376953125": 56, "048248302191495895": 56, "026345491409301758": 56, "291": 56, "49892428508028386": 56, "0013376812363276257": 56, "5619548875140026": 56, "005291177425533533": 56, "031787109375": 56, "08444305509328842": 56, "06384599208831787": 56, "339": 56, "5188016330823302": 56, "0005724920614738948": 56, "6982153896708041": 56, "0033194604329764843": 56, "013623046875": 56, "05807049572467804": 56, "02944713830947876": 56, "40960": 56, "294": 56, "340": 56, "114": 56, "4957636919803917": 56, "004073993970087031": 56, "4760114259843249": 56, "008029351010918617": 56, "062060546875": 56, "03772534430027008": 56, "0035400986671447754": 56, "338": 56, "5157848816365004": 56, "0030665539947221988": 56, "336146240857488": 56, "006352574564516544": 56, "04088807851076126": 56, "8884187638759613": 56, "337": 56, "5111317873932422": 56, "0015660247969208284": 56, "431194728880655": 56, "0047972844913601875": 56, "030078125": 56, "02516128309071064": 56, "002133488655090332": 56, "43008": 56, "309": 56, "4922599596902728": 56, "00019939174962928518": 56, "27829485264082904": 56, "0020252331160008907": 56, "00849609375": 56, "003118633758276701": 56, "016220271587371826": 56, "190": 56, "325": 56, "336": 56, "48774116234853865": 56, "0037827152031240986": 56, "19911157262977214": 56, "0032185050658881664": 56, "030908203125": 56, "013709803111851215": 56, "26044702529907227": 56, "4999147373251617": 56, "0014124810899375007": 56, "2843351167524816": 56, "005678324960172176": 56, "02919921875": 56, "020313650369644165": 56, "055005550384521484": 56, "45056": 56, "321": 56, "127": 56, "5042375044897198": 56, "0011692596512148158": 56, "16990109027537983": 56, "0032151443883776665": 56, "0251953125": 56, "04340684413909912": 56, "01492154598236084": 56, "334": 56, "5080808162689209": 56, "003549698476854246": 56, "1296787588755251": 56, "00621542613953352": 56, "05302734375": 56, "04627562314271927": 56, "0931699275970459": 56, "327": 56, "08": 56, "333": 56, "49875728664919733": 56, "0030554209108231587": 56, "17959667765753692": 56, "006197799928486347": 56, "040576171875": 56, "017518581822514534": 56, "0012366771697998047": 56, "47104": 56, "332": 56, "135": 56, "47473918814212085": 56, "000753292843728559": 56, "10499831844717847": 56, "0022293308284133673": 56, "022412109375": 56, "002592694014310837": 56, "007587909698486328": 56, "210": 56, "331": 56, "495706963352859": 56, "000426401813456323": 56, "08446975928891334": 56, "0015652569709345698": 56, "008056640625": 56, "010549742728471756": 56, "0012684464454650879": 56, "330": 56, "4880263367667794": 56, "0010256466252030806": 56, "11710972856089938": 56, "0032306264620274305": 56, "0166015625": 56, "178580224514008e": 56, "04952669143676758": 56, "49152": 56, "328": 56, "46606638189405203": 56, "0012806903061573394": 56, "06755283990169118": 56, "00338670052587986": 56, "015380859375": 56, "011496410705149174": 56, "011872351169586182": 56, "369": 56, "4813957496546209": 56, "0005197725258767605": 56, "05070137128532224": 56, "0011682924814522266": 56, "011328125": 56, "014267145656049252": 56, "02635061740875244": 56, "144": 56, "5085351384244859": 56, "0008456365059828386": 56, "07298006257788074": 56, "0026025455445051193": 56, "022119140625": 56, "021276511251926422": 56, "025399088859558105": 56, "51200": 56, "361": 56, "151": 56, "4632605144754052": 56, "0030253833654569464": 56, "04603174216354091": 56, "005220792256295681": 56, "01565437763929367": 56, "0232236385345459": 56, "383": 56, "323": [56, 63], "49069994343444706": 56, "0018766895205772015": 56, "03171468693126371": 56, "005815165117383003": 56, "061181640625": 56, "0051743886433541775": 56, "12434303760528564": 56, "370": 56, "322": 56, "152": 56, "49728800179436805": 56, "002779247868602397": 56, "04587990254440229": 56, "005585251376032829": 56, "0578125": 56, "01021644752472639": 56, "028099477291107178": 56, "ppo_2024": 56, "28_15": 56, "48_4fc693bc": 56, "backend": 56, "tkagg": 56, "turn": 56, "whatev": [56, 84], "come": 56, "n_simu": [56, 61, 80, 81, 86], "uniqu": [56, 63, 66, 67, 68, 76, 92, 96], "to_plot_df": 56, "global_step": [56, 61, 82, 104], "set_xlabel": 56, "set_ylabel": 56, "had": [56, 72, 74, 105, 106, 107], "data_sourc": [56, 82, 84], "string": [56, 63, 66, 67, 72, 74, 76, 78, 79, 82, 84, 104, 105, 106, 107], "Of": 56, "cours": 56, "dw_time_elaps": [56, 82], "ad": [56, 57, 104], "identifi": [56, 61, 66, 67, 68, 69, 74, 79], "visualis": [56, 110], "offer": [56, 109], "tree": [56, 66, 67], "structur": [56, 109], "tensorboard_log_fold": 56, "algo_nam": [56, 86], "event": [56, 86, 104], "tfevent": [56, 86], "xxxxx": [56, 86], "tfenvent": 56, "xxx": 56, "leaf": 56, "tuto": 56, "stablebaselin": [56, 71], "log_path": 56, "path_ppo": 56, "ppo_cartpol": 56, "path_a2c": 56, "a2c_cartpol": 56, "tensorboard_log": [56, 68], "model2": 56, "model2_seed2": 56, "5_000": 56, "tb_log_nam": [56, 68], "tensorboard_folder_to_datafram": 56, "scalar": [56, 66, 67, 68, 69, 76, 104], "tensorboad": 56, "kei": [56, 66, 67, 69, 72, 74, 75, 82, 84, 86, 92, 104, 105, 106, 107], "measur": [56, 66, 109], "data_in_datafram": 56, "ppo_1": 56, "685392": 56, "719999": 56, "689999": 56, "a2c_1": 56, "384617": 56, "071430": 56, "500000": 56, "444443": 56, "2500": [56, 57, 80], "980392": 56, "070175": 56, "3500": 56, "723076": 56, "4000": 56, "358208": 56, "4500": 56, "821918": 56, "448719": 56, "a2c_2": 56, "888889": 56, "766666": 56, "097561": 56, "090908": 56, "681820": 56, "135136": 56, "414635": 56, "229885": 56, "406593": 56, "927834": 56, "3145": 56, "2201": 56, "2072": 56, "1632": 56, "1614": 56, "1607": 56, "1616": 56, "1618": 56, "1621": 56, "1620": 56, "1612": 56, "1595": 56, "1391": 56, "1455": 56, "1510": 56, "1528": 56, "1542": 56, "1555": 56, "1567": 56, "1579": 56, "1584": 56, "1590": 56, "007330": 56, "009172": 56, "080078": 56, "057373": 56, "686539": 56, "670926": 56, "620371": 56, "650572": 56, "397493": 56, "603015": 56, "657923": 56, "598895": 56, "643388": 56, "581709": 56, "652546": 56, "619731": 56, "669858": 56, "627314": 56, "628350": 56, "599171": 56, "621631": 56, "595288": 56, "526879": 56, "519675": 56, "613992": 56, "412572": 56, "011276": 56, "149804": 56, "145305": 56, "033920": 56, "038705": 56, "040012": 56, "008545": 56, "001227": 56, "000320": 56, "000204": 56, "000652": 56, "000053": 56, "010114": 56, "052339": 56, "034683": 56, "032164": 56, "006440": 56, "002829": 56, "000402": 56, "001146": 56, "011359": 56, "000118": 56, "0007": 56, "986723": 56, "091626": 56, "012042": 56, "017113": 56, "456509": 56, "260746": 56, "965336": 56, "581780": 56, "398053": 56, "973416": 56, "076917": 56, "540639": 56, "548": 56, "644836": 56, "487506": 56, "014572": 56, "567771": 56, "852792": 56, "482461": 56, "358838": 56, "897699": 56, "174485": 56, "617828": 56, "083209": 56, "551695": 56, "038313": 56, "609340": 56, "policy_loss": 56, "864703": 56, "619358": 56, "543582": 56, "340154": 56, "098149": 56, "196676": 56, "017691": 56, "157162": 56, "859272": 56, "750966": 56, "904987": 56, "656176": 56, "951152": 56, "320224": 56, "048145": 56, "843183": 56, "130708": 56, "144796": 56, "748440": 56, "190367": 56, "fulli": 57, "quick": [57, 62, 63, 108, 110], "introduct": [57, 67, 68], "incorpor": 57, "behav": [57, 104], "exactli": 57, "replac": [57, 62, 63, 109], "unifi": [57, 73, 91, 92, 93, 94, 95, 96], "reproduc": [57, 59, 60, 63, 64, 97, 100, 102, 111], "quickli": 57, "detail": [57, 61, 62, 75, 110], "under": [57, 60, 62, 64, 72, 74, 105, 106, 107], "hood": 57, "metric": [57, 60, 72, 74, 105, 106, 107, 108, 110], "00000001": 57, "vf_coef": 57, "log_interv": [57, 104], "400": 57, "dev": [57, 62, 64, 72, 74, 105, 106, 107, 109, 112], "subclass": [57, 66, 67, 68], "stats_altern": 57, "456": [57, 63], "600": [57, 62], "n_optuna_work": [57, 76], "optuna_parallel": [57, 76], "fit_fract": [57, 76], "everyth": [57, 63], "readabl": 58, "set_level": [58, 63, 64], "examl": 58, "anymor": 58, "keep": [58, 62, 66, 67, 104], "By": [58, 60, 66, 67, 72, 74, 105, 106, 107], "tabular_rl": 59, "qlagent": 59, "experiment_to_sav": 59, "exploration_typ": 59, "arg": [59, 72, 74, 83, 91, 92, 93, 94, 95, 96, 105, 106, 107], "300000": 59, "ql": 59, "qlfrozenlak": 59, "178711": 59, "futurewarn": 59, "behavior": [59, 60, 61, 62, 63, 64, 89], "concaten": [59, 75, 104], "empti": 59, "futur": [59, 60, 67, 68], "longer": [59, 72, 74, 104, 105, 106, 107], "exclud": 59, "determin": [59, 76], "dtype": [59, 69, 91, 93, 94, 95], "retain": 59, "relev": 59, "concat": 59, "oper": [59, 60, 76, 109], "_data": [59, 104], "ignore_index": 59, "77377103": 59, "77378092": 59, "At": [59, 75], "Or": 59, "temporari": 59, "tempfil": 59, "temporarydirectori": 59, "tmpdir": 59, "csv": [59, 77], "locat": 59, "get_single_path_of_most_recently_trained_experiment_manager_obj_from_path": 59, "recent": [59, 84], "loading_tool": 59, "path_to_load": 59, "loaded_experiment_manag": 59, "test_env": 59, "next_observ": [59, 69], "success": [59, 66, 67, 68], "retri": 59, "haven": 59, "highli": [59, 66], "output_dir_path": 59, "env_seed_max_valu": 59, "agent_to_train_and_sav": 59, "195540": 59, "1830874": 59, "15802259": 59, "12087594": 59, "16358512": 59, "16674384": 59, "10049071": 59, "09517673": 59, "11326436": 59, "07236883": 59, "10552007": 59, "06660356": 59, "07020302": 59, "1104349": 59, "23065463": 59, "19028937": 59, "20689438": 59, "08408004": 59, "17382279": 59, "2417443": 59, "29498867": 59, "46487572": 59, "52043878": 59, "56986596": 59, "19259904": 59, "57831479": 59, "6858159": 59, "22998936": 59, "39350426": 59, "env_for_load": 59, "params_for_load": 59, "loaded_ag": 59, "reus": 59, "know": 60, "sequenc": [60, 66, 67, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "produc": [60, 72, 74, 76, 105, 106, 107], "repeat": [60, 108], "reason": [60, 72, 74, 105, 106, 107], "ani": [60, 61, 64, 66, 67, 68, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107], "essenti": 60, "debug": [60, 61, 64, 68, 72, 74, 103, 105, 106, 107], "valid": [60, 91, 92, 93, 94, 95, 96], "encount": [60, 104], "issu": [60, 62, 63, 64, 90], "exact": 60, "led": 60, "fix": [60, 62, 63, 64, 66, 67, 83], "guarante": [60, 91, 92, 93, 94, 95, 96], "trace": 60, "consist": 60, "anoth": [60, 61, 82, 104, 110], "review": [60, 62, 64], "confer": 60, "advertis": 60, "scenario": 60, "understood": 60, "commun": 60, "stakehold": 60, "machin": [60, 76, 109], "reli": 60, "help": [60, 62, 63, 72, 74, 105, 106, 107, 108], "across": [60, 66, 67, 76], "competit": 60, "ident": 60, "condit": [60, 72, 74, 105, 106, 107], "split": 60, "lead": [60, 89, 104], "alwai": [60, 62], "verif": 60, "result_list": 60, "entropi": [60, 61, 66, 67, 72, 74, 76, 89, 91, 93, 94, 95, 105, 106, 107], "spawn_kei": 60, "567498838741829": 60, "6356604305460527": 60, "n_children_spawn": 60, "2466559261185188": 60, "8402527193117317": 60, "4732958445958833": 60, "5863995575997462": 60, "1722486099076424": 60, "1930990650226178": 60, "current": [60, 63, 64, 67, 68, 72, 74, 76, 85, 104, 105, 106, 107], "randint": [60, 61], "item": [60, 92], "3817148928": 60, "671396126": 60, "2950680447": 60, "791815335": 60, "3335786391": 60, "82990446": 60, "2463687945": 60, "1829003305": 60, "647811387": 60, "3543380778": 60, "3887070615": 60, "363268341": 60, "3607514851": 60, "3881090947": 60, "1018754931": 60, "693246422": 60, "3606543353": 60, "433394544": 60, "2194426398": 60, "3928404622": 60, "customari": 61, "typic": [61, 72, 74, 76, 105, 106, 107, 108], "acquisit": 61, "mainli": [61, 110], "teach": [61, 63, 108, 110], "student": [61, 110], "gif_writ": 61, "saving_path": 61, "visu_gymnasium_gif": 61, "neural": 61, "due": [61, 72, 74, 105, 106, 107], "variablil": 61, "goe": 61, "being": [61, 63], "fed": [61, 70], "xtag": [61, 63, 82], "synchron": [61, 62, 82], "coordin": [61, 91], "hand": [61, 62], "kind": [61, 86], "aggreg": [61, 78], "distinct": 61, "too": [61, 64], "rug": 61, "blue": 61, "averag": [61, 108], "light": 61, "inidividu": 61, "savefig": [61, 80, 81, 82], "png": 61, "bar": 61, "explicit": 61, "vertic": 61, "errorbar": 61, "band": [61, 63, 80, 82], "analysi": [61, 80, 82, 109], "along": 61, "whole": [61, 76], "prefer": [61, 62], "interpret": [61, 76, 77], "docstr": [61, 62, 63], "plu": [61, 80, 81, 82], "minu": [61, 80, 81, 82], "quantil": [61, 80, 81, 82], "divid": [61, 80, 81, 82], "sqrt": [61, 80, 81, 82], "choose_random_ag": [61, 79], "example_ev": 61, "repositori": [62, 63, 64, 110], "submit": 62, "pr": [62, 63, 64, 112], "local": [62, 63, 64], "git": [62, 64, 109], "account": [62, 80, 82], "click": 62, "button": 62, "copi": [62, 66, 67, 68, 72, 76], "clone": 62, "repo": 62, "disk": 62, "your_login": 62, "connect": [62, 72, 74, 105, 106, 107], "slow": 62, "cd": [62, 64], "poetri": [62, 63, 64], "pip": [62, 109, 110], "curl": [62, 64], "ssl": [62, 64], "venv": 62, "sync": [62, 64], "shell": 62, "yourcommandher": 62, "upstream": 62, "remot": [62, 63], "latest": [62, 82, 84], "properli": 62, "branch": [62, 63], "checkout": 62, "fetch": 62, "merg": [62, 63], "hold": 62, "my_featur": 62, "edit": 62, "modified_fil": 62, "push": 62, "origin": 62, "instruct": 62, "send": [62, 66, 67], "email": 62, "committ": 62, "subsequ": 62, "conflict": 62, "relat": 62, "resolv": 62, "rebas": 62, "softwar": [62, 109], "style": [62, 72, 74, 76, 104, 105, 106, 107], "flake8": 62, "reformat": 62, "recommit": 62, "verifi": 62, "autopep8": 62, "yourfil": 62, "approv": 62, "mrg": 62, "complet": [62, 72, 109], "subject": 62, "incomplet": 62, "receiv": [62, 63], "wip": 62, "matur": 62, "someth": [62, 64], "duplic": 62, "broad": 62, "seek": 62, "collabor": 62, "benefit": 62, "inclus": 62, "task": [62, 72, 74, 105, 106, 107, 108], "pytest": [62, 64], "guidelin": [62, 108, 110, 111], "enhanc": 62, "correct": [62, 63, 66, 67, 69], "auto": 62, "pep8": 62, "violat": 62, "azur": [62, 64], "pipelin": [62, 63, 64, 70], "mac": 62, "window": [62, 72, 74, 105, 106, 107], "view": [62, 92], "articl": 62, "written": [62, 104], "restructuredtext": [62, 64], "rst": 62, "myst": [62, 109], "parser": [62, 109], "_nameref": 62, "namer": 62, "md": 62, "syntax": 62, "explan": [62, 108], "video_plot_my_experi": 62, "width": 62, "sphinx_gallery_thumbnail_path": 62, "thumbnail": 62, "jpg": 62, "my_experi": 62, "slower": 62, "guidel": 62, "472": 63, "integr": 63, "468": 63, "407": 63, "make_gym": 63, "453": 63, "463": 63, "typo": 63, "patch": 63, "writer_data": 63, "438": 63, "445": 63, "455": 63, "411": 63, "405": 63, "406": 63, "408": 63, "404": 63, "experiencemanag": 63, "396": 63, "coverag": [63, 64], "workflow": [63, 108, 109], "385": 63, "rtd": 63, "379": 63, "materi": 63, "simplifi": 63, "376": 63, "seaborn": 63, "confid": [63, 80, 81, 82, 108], "276": 63, "326": 63, "upgrad": 63, "281": 63, "318": 63, "gpu": 63, "make_atari_env": [63, 71], "togeth": 63, "298": 63, "jax": [63, 76, 77], "attent": [63, 76], "277": 63, "273": 63, "percentil": 63, "interv": [63, 80, 81, 82, 91, 108], "sd": 63, "261": 63, "mdqnagent": 63, "mdqn": 63, "244": 63, "253": 63, "compress": [63, 66, 67], "link": [63, 108], "spring": 63, "226": 63, "227": 63, "curv": [63, 80, 81, 82, 110], "223": 63, "132": 63, "tracker": 63, "bandittrack": 63, "track": 63, "191": 63, "161": 63, "replai": 63, "replaybuff": 63, "memori": 63, "feb": 63, "2022": 63, "126": 63, "__version__": 63, "0dev0": 63, "attribut": [63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "overrid": [63, 104], "__eq__": 63, "118": 63, "feat": 63, "finitemdp": 63, "timestamp": [63, 72, 74, 76, 105, 106, 107], "short": [63, 64], "layout": 63, "simpler": 63, "quickstart": [63, 108, 110, 111], "rlsvi": 63, "tabular": 63, "rlsviagent": 63, "posterior": 63, "psrl": 63, "psrlagent": 63, "contributor": 63, "unique_id": [63, 66, 67, 68], "assign": 63, "remoteexperimentmanag": [63, 104], "transfer": [63, 110], "basewrapp": 63, "convert": [63, 72, 91, 92, 93, 94, 95, 96, 109], "default_rng": [63, 69], "randomst": 63, "agenthandl": 63, "miss": 63, "agentstat": 63, "free": 63, "pomdp": 63, "multi": [63, 95, 109], "processpoolexecutor": 63, "threadpoolexecutor": 63, "nest": 63, "reverb": 63, "client": 63, "exchang": 63, "messag": 63, "socket": 63, "report": [64, 76], "standalon": 64, "pull": 64, "rest": 64, "beginn": 64, "question": [64, 72], "trigger": 64, "glad": 64, "sort": 64, "markdown": 64, "live": 64, "assum": 64, "_build": 64, "examples_pattern": 64, "your_regex_goes_her": 64, "cover": 64, "resourc": 64, "root": 64, "long_test": 64, "ltest": 64, "test_": 64, "belong": 64, "ltest_": 64, "numpydoc": [64, 109], "rather": [64, 72, 74, 105, 106, 107], "statement": 64, "outsid": [64, 72, 74, 105, 106, 107], "callabl": [66, 67, 68, 76, 78, 82, 84], "copy_env": [66, 67, 68], "bool": [66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 84, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "compress_pickl": [66, 67], "_execution_metadata": [66, 67, 68], "executionmetadata": [66, 67, 68, 104], "_default_writer_kwarg": [66, 67, 68], "_thread_shared_data": [66, 67, 68], "abc": 66, "agenttorch": 66, "abstract": [66, 67], "overridden": [66, 67], "match": [66, 67, 76, 84], "bz2": [66, 67], "_gener": [66, 67, 89, 91, 92, 93, 94, 95, 96], "thread_shared_data": [66, 67, 68, 76], "float": [66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 91, 104, 105, 106, 107], "qualiti": 66, "keyword": [66, 67, 68, 71, 73, 83, 98, 99, 100, 102], "complex": [66, 67, 76, 109], "mct": [66, 67], "ideal": [66, 67], "budget1": [66, 67], "budget2": [66, 67], "equival": [66, 67], "properti": [66, 67, 68, 69, 72, 74, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "reduc": [66, 67, 82], "earli": [66, 67, 75, 108], "strictli": [66, 67], "elsewher": [66, 67], "subobject": [66, 67, 68, 72, 74, 105, 106, 107], "filenam": [66, 67, 68, 72, 76, 105, 106, 107], "filepath": [66, 67], "seed_seq": [66, 67, 68, 72, 74, 76, 88, 89, 91, 93, 94, 95, 105, 106, 107], "dill": [66, 67], "r5d46c33e8424": 66, "stackoverflow": [66, 67, 72], "25353243": [66, 67], "tri": [66, 67], "pathlib": [66, 67, 68, 76], "otherwis": [66, 67, 68, 69, 72, 74, 79, 88, 89, 105, 106, 107], "suffix": [66, 67], "uqfound": [66, 67], "_writer": [66, 67, 68, 82, 84], "safe": [66, 67, 68, 78], "overwritten": [67, 76], "sutton": [67, 68, 72, 74, 105, 106, 107], "barto": [67, 68], "2018": [67, 68], "mit": [67, 68], "press": [67, 68], "vari": 67, "r466db297bd20": 67, "basealgorithm": 68, "basepolici": 68, "tensorflow": 68, "reset_num_timestep": 68, "num_timestep": 68, "set_logg": 68, "sb3": [68, 109], "enable_priorit": 69, "shape": [69, 72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "time_s": 69, "priorit": 69, "sampling_mod": 69, "degre": 69, "blob": [69, 72], "master": [69, 71, 72, 91], "deepq": 69, "replay_buff": 69, "100_000": 69, "setup_entri": 69, "float32": 69, "uint32": 69, "end_episod": 69, "trajectori": 69, "dictionari": [69, 70, 72, 74, 75, 76, 82, 84, 92, 105, 106, 107], "uniformli": [69, 93], "namedtupl": 69, "obj": [69, 78, 88], "meth": 69, "update_prior": 69, "new_prior": 69, "prioriti": 69, "wrapper_kwarg": 70, "wrapper1": 70, "wrapper2": 70, "reward_rang": [70, 74, 105], "unscal": 70, "adaptor": 71, "n_frame_stack": 71, "readthedoc": 71, "io": [71, 78, 108], "en": 71, "env_util": 71, "atari_wrappers_dict": 71, "terminal_on_life_loss": 71, "similar": 72, "point": [72, 82], "initiallth": 72, "meatadata": 72, "1443129": 72, "farama": 72, "foundat": 72, "class_nam": [72, 105, 106, 107], "is_render_en": [72, 105, 106, 107], "clean": [72, 74, 105, 106, 107], "databas": [72, 74, 105, 106, 107], "rais": [72, 74, 91, 96, 100, 105, 106, 107], "get_video": [72, 105, 106, 107], "is_gen": [72, 74, 105, 106, 107], "is_onlin": [72, 74, 105, 106, 107], "np_random": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "intern": [72, 74, 89, 105, 106, 107], "_np_random": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "initialis": [72, 74, 105, 106, 107], "known": [72, 74, 105, 106, 107], "convent": [72, 74, 105, 106, 107], "human": [72, 74, 105, 106, 107], "consumpt": [72, 74, 105, 106, 107], "doesn": [72, 74, 105, 106, 107], "ndarrai": [72, 74, 91, 93, 94, 95, 105, 106, 107], "pixel": [72, 74, 105, 106, 107], "ansi": [72, 74, 105, 106, 107], "stringio": [72, 74, 105, 106, 107], "newlin": [72, 74, 105, 106, 107], "escap": [72, 74, 105, 106, 107], "color": [72, 74, 105, 106, 107], "rgb_array_list": [72, 74, 105, 106, 107], "ansi_list": [72, 74, 105, 106, 107], "rendercollect": [72, 74, 105, 106, 107], "pop": [72, 74, 105, 106, 107], "generalis": [72, 74, 105, 106, 107], "therefor": [72, 74, 105, 106, 107, 109], "correctli": [72, 74, 105, 106, 107], "return_info": [72, 74, 105, 106, 107], "prng": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "urandom": [72, 74, 105, 106, 107], "paradigm": [72, 74, 105, 106, 107], "obstyp": [72, 74, 105, 106, 107], "analog": [72, 74, 105, 106, 107], "auxiliari": [72, 74, 105, 106, 107], "complement": [72, 74, 105, 106, 107], "dynam": [72, 74, 105, 106, 107], "favor": [72, 74, 105, 106, 107], "clearer": [72, 74, 105, 106, 107], "bootstrap": [72, 74, 80, 82, 105, 106, 107], "acttyp": [72, 74, 105, 106, 107], "veloc": [72, 74, 105, 106, 107], "pole": [72, 74, 105, 106, 107], "supportsfloat": [72, 74, 105, 106, 107], "lava": [72, 74, 105, 106, 107], "barton": [72, 74, 105, 106, 107], "scope": [72, 74, 105, 106, 107], "satisfi": [72, 74, 105, 106, 107], "physic": [72, 74, 105, 106, 107], "prematur": [72, 74, 105, 106, 107], "diagnost": [72, 74, 105, 106, 107], "hidden": [72, 74, 105, 106, 107], "individu": [72, 74, 76, 105, 106, 107], "term": [72, 74, 105, 106, 107], "combin": [72, 74, 105, 106, 107], "v26": [72, 74, 105, 106, 107], "distinguish": [72, 74, 105, 106, 107], "favour": [72, 74, 105, 106, 107], "boolean": [72, 74, 80, 81, 82, 85, 87, 91, 92, 93, 94, 95, 96, 105, 106, 107], "undefin": [72, 74, 105, 106, 107], "signal": [72, 74, 105, 106, 107], "emit": [72, 74, 105, 106, 107], "underli": [72, 74, 105, 106, 107], "successfulli": [72, 74, 105, 106, 107, 108], "exceed": [72, 74, 105, 106, 107], "enter": [72, 74, 105, 106, 107], "invalid": [72, 74, 93, 105, 106, 107], "r_min": 74, "r_max": 74, "minimum": [74, 104], "renderfram": 74, "multipleagentscompar": 75, "n_agent": 75, "pairwis": 75, "spent": [75, 76], "manager_list": 75, "n_evalu": 75, "compute_mean_diff": 75, "absolut": 75, "get_result": 75, "partial_compar": 75, "eval_valu": 75, "th": 75, "interim": 75, "till": 75, "pp0": 75, "id_finish": 75, "bk": 75, "threshold": 75, "ax1": 75, "ax2": 75, "gridspec_kw": 75, "height_ratio": 75, "plot_results_sota": 75, "worker_logging_level": 76, "outdir_id_styl": 76, "default_writer_kwarg": 76, "init_kwargs_per_inst": 76, "doubl": [76, 105], "processor": 76, "multipli": 76, "forkserv": [76, 77], "context": [76, 77], "pytorch": [76, 77, 90, 109], "agent_name_": 76, "agent_name_unique_id": 76, "agent_name_timestamp_short_id": 76, "style_log": [76, 104], "progressbar": [76, 104], "one_lin": [76, 104], "rlberry_vers": 76, "guard": 76, "program": 76, "1e4": [76, 84], "build_eval_env": 76, "reseed": [76, 88], "clear_handl": 76, "clear_output_dir": 76, "randomli": 76, "facilit": [76, 108], "conduct": 76, "agent_manag": [76, 78], "evaluation_result": 76, "generate_profil": 76, "fname": 76, "agent_name_profil": 76, "prof": 76, "pruner_method": 76, "continue_previ": 76, "sampler_kwarg": 76, "disable_evaluation_writ": 76, "custom_eval_funct": 76, "tpe": 76, "cmae": 76, "cma": 76, "studi": 76, "unlimit": 76, "pruner": 76, "fraction": 76, "partial": 76, "prune": 76, "sampler": 76, "person": 76, "evaluation_funct": 76, "agent_list": 76, "evaluation_function_kwarg": 76, "trained_ag": 76, "later": 76, "recreat": 76, "writer_fn": 76, "writer_kwarg": 76, "intanc": 77, "immedi": 77, "agent_sourc": 78, "mean_ev": 78, "scipi": 78, "down": 78, "tukei": 78, "hsd": 78, "parametr": 78, "assumpt": 78, "heavi": 78, "tail": 78, "multimod": 78, "summaris": 78, "devdoc": 78, "lehmann": 78, "joseph": 78, "romano": 78, "1007": 78, "387": 78, "27605": 78, "springer": 78, "experiment_manager_list": 79, "fignum": 79, "smoothing_bandwidth": [80, 82], "n_boot": [80, 82], "savefig_fnam": [80, 81, 82], "until": 80, "choosen": [80, 82], "heurist": [80, 82], "infer": [80, 82], "harder": [80, 82], "draw": [80, 82, 96], "conclus": [80, 82], "wide": [80, 82], "id_ag": [82, 84], "sub_sampl": 82, "subdirectori": [82, 84], "date": [82, 84], "nadaraya": 82, "watson": 82, "pb": 82, "cost": 82, "noth": [82, 88], "4e4": 82, "kwd": 83, "preset": 83, "manager_mak": 83, "many_agent_by_str_datasourc": 84, "datasourc": 84, "path_to_tensorboard_data": 86, "preced": 87, "venv_fir_nam": 87, "tqdm": 87, "run_tqdm": 87, "noqa": 87, "sleep": 87, "test_venv": 87, "reseed_spac": 88, "seed_val": 88, "spawn_seed_seq": 89, "bit_gener": 89, "strongli": 89, "unexpect": 89, "unnecessari": 89, "generate_st": 89, "squeez": 89, "7068": 90, "issuecom": 90, "487907668": 90, "low": 91, "float64": 91, "cartesian": 91, "b_1": 91, "a_2": 91, "b_2": 91, "a_n": 91, "b_n": 91, "mechan": [91, 92, 93, 94, 95, 96], "member": [91, 92, 93, 94, 95, 96], "from_json": [91, 92, 93, 94, 95, 96], "sample_n": [91, 92, 93, 94, 95, 96], "jsonabl": [91, 92, 94, 95, 96], "is_bound": 91, "manner": 91, "valueerror": [91, 96], "neither": 91, "nor": 91, "is_np_flatten": [91, 92, 93, 94, 95, 96], "flatten": [91, 92, 93, 94, 95, 96], "lazili": [91, 92, 93, 94, 95, 96], "expens": [91, 92, 93, 94, 95, 96], "githubusercont": 91, "oo": 91, "shift": 91, "possibli": [91, 93, 94, 95], "subspac": [91, 92, 93, 94, 95, 96], "stricter": [91, 94, 95], "to_json": [91, 92, 93, 94, 95, 96], "spaces_kwarg": 92, "ordereddict": 92, "keysview": 92, "mask": [92, 93, 94, 95, 96], "constitu": 92, "composit": [92, 96], "immut": [92, 93, 96], "int64": [93, 95], "json": 93, "int8": [93, 94, 95], "infeas": 93, "coin": 94, "toss": 94, "binari": 94, "nvec": 95, "unless": 95, "smallest": 95, "count": 96, "occurr": 96, "almost": 97, "continuous_st": [98, 99, 100, 101, 102], "discrete_st": [98, 99, 100, 102, 106], "notset": 103, "print_log": 104, "multi_lin": 104, "tensorboard_kwarg": 104, "execution_metadata": 104, "maxlen": 104, "maxlen_by_tag": 104, "stderr": 104, "metadata_util": 104, "logic": 104, "summary_writ": 104, "set_max_global_step": 104, "scalar_valu": 104, "walltim": 104, "new_styl": 104, "confus": 104, "tensor": [104, 109], "field": 104, "simple_valu": 104, "main_tag": 104, "tag_scalar_dict": 104, "read_first_tag_valu": 104, "read_last_tag_valu": 104, "read_tag_valu": 104, "seri": 104, "spec": [105, 106, 107], "get_continuous_st": 106, "get_discrete_st": 106, "fun": 108, "lot": 108, "bore": 108, "tricki": 108, "spend": 108, "nice": 108, "comprehens": 108, "proper": 108, "hackabl": 108, "userguid": 108, "changelog": 108, "princip": 108, "acquir": 108, "propos": 108, "hypothesi": [108, 110], "paper": 108, "todo": 108, "suggest": 109, "miniconda": 109, "releas": 109, "recommand": 109, "framework": 109, "filter": 109, "platform": 109, "hobbyist": 109, "ai": 109, "opencv": 109, "vision": 109, "dimension": 109, "mathemat": 109, "xxxxxxxx": 109, "math": 109, "dollar": 109, "sphinxcontrib": 109, "zsh": 109, "bracket": 109, "glob": 109, "quot": 109, "tag_nam": 109, "torch_ag": 109, "welcom": 110, "popular": 110, "easiest": 110, "period": [110, 111], "export": 110, "extract": 110, "writerdata": 110, "yaml": 111, "config": 111, "preview": 112}, "objects": {"rlberry.agents": [[66, 0, 1, "", "Agent"], [67, 0, 1, "", "AgentWithSimplePolicy"]], "rlberry.agents.Agent": [[66, 1, 1, "", "eval"], [66, 1, 1, "", "fit"], [66, 1, 1, "", "get_params"], [66, 1, 1, "", "load"], [66, 2, 1, "", "output_dir"], [66, 1, 1, "", "reseed"], [66, 2, 1, "", "rng"], [66, 1, 1, "", "sample_parameters"], [66, 1, 1, "", "save"], [66, 1, 1, "", "set_writer"], [66, 2, 1, "", "thread_shared_data"], [66, 2, 1, "", "unique_id"], [66, 2, 1, "", "writer"]], "rlberry.agents.AgentWithSimplePolicy": [[67, 1, 1, "", "eval"], [67, 1, 1, "", "fit"], [67, 1, 1, "", "get_params"], [67, 1, 1, "", "load"], [67, 2, 1, "", "output_dir"], [67, 1, 1, "", "policy"], [67, 1, 1, "", "reseed"], [67, 2, 1, "", "rng"], [67, 1, 1, "", "sample_parameters"], [67, 1, 1, "", "save"], [67, 1, 1, "", "set_writer"], [67, 2, 1, "", "thread_shared_data"], [67, 2, 1, "", "unique_id"], [67, 2, 1, "", "writer"]], "rlberry.agents.stable_baselines": [[68, 0, 1, "", "StableBaselinesAgent"]], "rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, 1, 1, "", "eval"], [68, 1, 1, "", "fit"], [68, 1, 1, "", "get_params"], [68, 1, 1, "", "load"], [68, 2, 1, "", "output_dir"], [68, 1, 1, "", "policy"], [68, 1, 1, "", "reseed"], [68, 2, 1, "", "rng"], [68, 1, 1, "", "sample_parameters"], [68, 1, 1, "", "save"], [68, 1, 1, "", "set_logger"], [68, 1, 1, "", "set_writer"], [68, 2, 1, "", "thread_shared_data"], [68, 2, 1, "", "unique_id"], [68, 2, 1, "", "writer"]], "rlberry.agents.utils.replay": [[69, 0, 1, "", "ReplayBuffer"]], "rlberry.agents.utils.replay.ReplayBuffer": [[69, 1, 1, "", "append"], [69, 1, 1, "", "clear"], [69, 2, 1, "", "data"], [69, 2, 1, "", "dtypes"], [69, 1, 1, "", "end_episode"], [69, 2, 1, "", "max_episode_steps"], [69, 1, 1, "", "sample"], [69, 1, 1, "", "setup_entry"], [69, 2, 1, "", "tags"], [69, 1, 1, "", "update_priorities"]], "rlberry.envs": [[70, 3, 1, "", "PipelineEnv"], [71, 3, 1, "", "atari_make"], [73, 3, 1, "", "gym_make"]], "rlberry.envs.basewrapper": [[72, 0, 1, "", "Wrapper"]], "rlberry.envs.basewrapper.Wrapper": [[72, 1, 1, "", "close"], [72, 1, 1, "", "get_params"], [72, 1, 1, "", "get_video"], [72, 1, 1, "", "get_wrapper_attr"], [72, 1, 1, "", "is_generative"], [72, 1, 1, "", "is_online"], [72, 2, 1, "", "np_random"], [72, 1, 1, "", "render"], [72, 1, 1, "", "reseed"], [72, 1, 1, "", "reset"], [72, 2, 1, "", "rng"], [72, 1, 1, "", "sample"], [72, 1, 1, "", "save_video"], [72, 1, 1, "", "step"], [72, 2, 1, "", "unwrapped"]], "rlberry.envs.interface": [[74, 0, 1, "", "Model"]], "rlberry.envs.interface.Model": [[74, 1, 1, "", "close"], [74, 1, 1, "", "get_params"], [74, 1, 1, "", "get_wrapper_attr"], [74, 1, 1, "", "is_generative"], [74, 1, 1, "", "is_online"], [74, 2, 1, "", "np_random"], [74, 1, 1, "", "render"], [74, 1, 1, "", "reseed"], [74, 1, 1, "", "reset"], [74, 2, 1, "", "rng"], [74, 1, 1, "", "sample"], [74, 1, 1, "", "step"], [74, 2, 1, "", "unwrapped"]], "rlberry.manager": [[75, 0, 1, "", "AdastopComparator"], [76, 0, 1, "", "ExperimentManager"], [77, 0, 1, "", "MultipleManagers"], [78, 3, 1, "", "compare_agents"], [79, 3, 1, "", "evaluate_agents"], [80, 3, 1, "", "plot_smoothed_curves"], [81, 3, 1, "", "plot_synchronized_curves"], [82, 3, 1, "", "plot_writer_data"], [83, 3, 1, "", "preset_manager"], [84, 3, 1, "", "read_writer_data"], [85, 3, 1, "", "run_venv_xp"], [86, 3, 1, "", "tensorboard_folder_to_dataframe"], [87, 3, 1, "", "with_venv"]], "rlberry.manager.AdastopComparator": [[75, 1, 1, "", "compare"], [75, 1, 1, "", "compute_mean_diffs"], [75, 1, 1, "", "get_results"], [75, 1, 1, "", "partial_compare"], [75, 1, 1, "", "plot_results"], [75, 1, 1, "", "plot_results_sota"], [75, 1, 1, "", "print_results"]], "rlberry.manager.ExperimentManager": [[76, 1, 1, "", "build_eval_env"], [76, 1, 1, "", "clear_handlers"], [76, 1, 1, "", "clear_output_dir"], [76, 1, 1, "", "eval_agents"], [76, 1, 1, "", "fit"], [76, 1, 1, "", "generate_profile"], [76, 1, 1, "", "get_agent_instances"], [76, 1, 1, "", "get_writer_data"], [76, 1, 1, "", "load"], [76, 1, 1, "", "optimize_hyperparams"], [76, 1, 1, "", "save"], [76, 1, 1, "", "set_writer"]], "rlberry.manager.MultipleManagers": [[77, 1, 1, "", "append"], [77, 1, 1, "", "run"], [77, 1, 1, "", "save"]], "rlberry.seeding": [[88, 3, 1, "", "safe_reseed"], [90, 3, 1, "", "set_external_seed"]], "rlberry.seeding.seeder": [[89, 0, 1, "", "Seeder"]], "rlberry.seeding.seeder.Seeder": [[89, 1, 1, "", "reseed"], [89, 1, 1, "", "spawn"]], "rlberry.spaces": [[91, 0, 1, "", "Box"], [92, 0, 1, "", "Dict"], [93, 0, 1, "", "Discrete"], [94, 0, 1, "", "MultiBinary"], [95, 0, 1, "", "MultiDiscrete"], [96, 0, 1, "", "Tuple"]], "rlberry.spaces.Box": [[91, 1, 1, "", "contains"], [91, 1, 1, "", "from_jsonable"], [91, 1, 1, "", "is_bounded"], [91, 2, 1, "", "is_np_flattenable"], [91, 2, 1, "", "np_random"], [91, 1, 1, "", "reseed"], [91, 1, 1, "", "sample"], [91, 1, 1, "", "seed"], [91, 2, 1, "", "shape"], [91, 1, 1, "", "to_jsonable"]], "rlberry.spaces.Dict": [[92, 1, 1, "", "contains"], [92, 1, 1, "", "from_jsonable"], [92, 1, 1, "", "get"], [92, 2, 1, "", "is_np_flattenable"], [92, 1, 1, "", "items"], [92, 1, 1, "", "keys"], [92, 2, 1, "", "np_random"], [92, 1, 1, "", "sample"], [92, 1, 1, "", "seed"], [92, 2, 1, "", "shape"], [92, 1, 1, "", "to_jsonable"], [92, 1, 1, "", "values"]], "rlberry.spaces.Discrete": [[93, 1, 1, "", "contains"], [93, 1, 1, "", "from_jsonable"], [93, 2, 1, "", "is_np_flattenable"], [93, 2, 1, "", "np_random"], [93, 1, 1, "", "reseed"], [93, 1, 1, "", "sample"], [93, 1, 1, "", "seed"], [93, 2, 1, "", "shape"], [93, 1, 1, "", "to_jsonable"]], "rlberry.spaces.MultiBinary": [[94, 1, 1, "", "contains"], [94, 1, 1, "", "from_jsonable"], [94, 2, 1, "", "is_np_flattenable"], [94, 2, 1, "", "np_random"], [94, 1, 1, "", "reseed"], [94, 1, 1, "", "sample"], [94, 1, 1, "", "seed"], [94, 2, 1, "", "shape"], [94, 1, 1, "", "to_jsonable"]], "rlberry.spaces.MultiDiscrete": [[95, 1, 1, "", "contains"], [95, 1, 1, "", "from_jsonable"], [95, 2, 1, "", "is_np_flattenable"], [95, 2, 1, "", "np_random"], [95, 1, 1, "", "reseed"], [95, 1, 1, "", "sample"], [95, 1, 1, "", "seed"], [95, 2, 1, "", "shape"], [95, 1, 1, "", "to_jsonable"]], "rlberry.spaces.Tuple": [[96, 1, 1, "", "contains"], [96, 1, 1, "", "count"], [96, 1, 1, "", "from_jsonable"], [96, 1, 1, "", "index"], [96, 2, 1, "", "is_np_flattenable"], [96, 2, 1, "", "np_random"], [96, 1, 1, "", "sample"], [96, 1, 1, "", "seed"], [96, 2, 1, "", "shape"], [96, 1, 1, "", "to_jsonable"]], "rlberry.utils": [[97, 3, 1, "", "check_env"], [98, 3, 1, "", "check_experiment_manager"], [99, 3, 1, "", "check_fit_additive"], [100, 3, 1, "", "check_rl_agent"], [101, 3, 1, "", "check_save_load"], [102, 3, 1, "", "check_seeding_agent"]], "rlberry.utils.logging": [[103, 3, 1, "", "set_level"]], "rlberry.utils.writers": [[104, 0, 1, "", "DefaultWriter"]], "rlberry.utils.writers.DefaultWriter": [[104, 1, 1, "", "add_scalar"], [104, 1, 1, "", "add_scalars"], [104, 1, 1, "", "read_first_tag_value"], [104, 1, 1, "", "read_last_tag_value"], [104, 1, 1, "", "read_tag_value"], [104, 1, 1, "", "reset"]], "rlberry.wrappers": [[105, 0, 1, "", "RescaleRewardWrapper"]], "rlberry.wrappers.RescaleRewardWrapper": [[105, 1, 1, "", "close"], [105, 1, 1, "", "get_params"], [105, 1, 1, "", "get_video"], [105, 1, 1, "", "get_wrapper_attr"], [105, 1, 1, "", "is_generative"], [105, 1, 1, "", "is_online"], [105, 2, 1, "", "np_random"], [105, 1, 1, "", "render"], [105, 1, 1, "", "reseed"], [105, 1, 1, "", "reset"], [105, 2, 1, "", "rng"], [105, 1, 1, "", "sample"], [105, 1, 1, "", "save_video"], [105, 1, 1, "", "step"], [105, 2, 1, "", "unwrapped"]], "rlberry.wrappers.discretize_state": [[106, 0, 1, "", "DiscretizeStateWrapper"]], "rlberry.wrappers.discretize_state.DiscretizeStateWrapper": [[106, 1, 1, "", "close"], [106, 1, 1, "", "get_params"], [106, 1, 1, "", "get_video"], [106, 1, 1, "", "get_wrapper_attr"], [106, 1, 1, "", "is_generative"], [106, 1, 1, "", "is_online"], [106, 2, 1, "", "np_random"], [106, 1, 1, "", "render"], [106, 1, 1, "", "reseed"], [106, 1, 1, "", "reset"], [106, 2, 1, "", "rng"], [106, 1, 1, "", "sample"], [106, 1, 1, "", "save_video"], [106, 1, 1, "", "step"], [106, 2, 1, "", "unwrapped"]], "rlberry.wrappers.gym_utils": [[107, 0, 1, "", "OldGymCompatibilityWrapper"]], "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, 1, 1, "", "close"], [107, 1, 1, "", "get_params"], [107, 1, 1, "", "get_video"], [107, 1, 1, "", "get_wrapper_attr"], [107, 1, 1, "", "is_generative"], [107, 1, 1, "", "is_online"], [107, 2, 1, "", "np_random"], [107, 1, 1, "", "render"], [107, 1, 1, "", "reseed"], [107, 1, 1, "", "reset"], [107, 2, 1, "", "rng"], [107, 1, 1, "", "sample"], [107, 1, 1, "", "save_video"], [107, 1, 1, "", "step"], [107, 2, 1, "", "unwrapped"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:property", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "property", "Python property"], "3": ["py", "function", "Python function"]}, "titleterms": {"about": 0, "u": 0, "contributor": 0, "cite": 0, "rlberri": [0, 1, 5, 14, 23, 35, 36, 43, 44, 48, 49, 53, 54, 56, 57, 58, 60, 61, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110], "fund": 0, "api": 1, "manag": [1, 37, 49, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87], "main": [1, 108], "class": 1, "evalu": [1, 46, 61, 111], "plot": [1, 39, 40, 41, 61], "agent": [1, 5, 36, 44, 45, 46, 49, 50, 52, 53, 55, 59, 61, 66, 67, 68, 69, 108, 109, 110, 111], "base": [1, 16], "import": [1, 43, 49, 56], "tool": [1, 40], "environ": [1, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 43, 49, 50, 54], "space": [1, 91, 92, 93, 94, 95, 96], "seed": [1, 51, 60, 88, 89, 90, 108], "util": [1, 69, 97, 98, 99, 100, 101, 102, 103, 104], "log": [1, 58, 64, 103], "type": 1, "virtual": [1, 35], "writer": [1, 56, 58, 104], "check": 1, "wrapper": [1, 72, 105, 106, 107], "compar": [2, 3, 49, 52, 55, 108], "ppo": [2, 10, 52], "a2c": [2, 6, 43, 52], "acrobot": [2, 11, 24, 43], "adastop": [2, 52, 108], "bandit": [3, 14, 15, 16, 17, 18, 19, 36, 40], "algorithm": [3, 6, 7, 8, 9, 10, 11, 12, 13, 16], "sac": 4, "soft": 4, "actor": 4, "critic": 4, "illustr": [5, 14, 23, 36, 40], "A": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37], "demo": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37], "pball2d": [6, 10, 31], "dqn": [7, 9], "cartpol": [7, 9], "mbqvi": 8, "gridworld": [8, 28], "m": 9, "rskernelucbviag": 11, "rsucbvi": 12, "mountaincar": [12, 29], "valueiter": 13, "chain": [13, 27], "comparison": [15, 16, 44, 50, 52, 108], "thompson": 15, "sampl": [15, 44], "ucb": [15, 19], "bernoulli": 15, "gaussian": 15, "subplot": 16, "variou": 16, "index": 16, "exp3": 17, "cumul": [17, 19, 50], "regret": [17, 19, 50], "bai": 18, "real": 18, "dataset": 18, "select": 18, "mirror": 18, "comput": [20, 42], "time": [20, 42], "atari": [21, 22, 26, 54], "atlanti": 21, "vector": [21, 22], "ppoagent": [21, 22], "breakout": 22, "rsucbviag": 24, "applegold": 25, "freewai": 26, "dqnagent": [26, 33], "valueiterationag": 28, "oldgymcompatibilitywrapp": [30, 107], "old_acrobot": 30, "room": 32, "springcartpol": 33, "twinroom": 34, "us": [35, 47, 53, 54, 55, 57, 60, 66, 67, 68, 71, 73, 75, 76, 78, 79, 82, 84, 85, 87, 93, 105, 107], "multipl": [35, 44], "galleri": 36, "exampl": [36, 62, 66, 67, 68, 71, 73, 75, 76, 78, 79, 82, 84, 85, 87, 93, 105, 107], "experi": [37, 47, 49, 55, 58, 59, 60, 110, 111], "checkpoint": 38, "kernel": 39, "function": 39, "record": 41, "reward": [41, 49, 50, 61], "dure": [41, 49], "train": [41, 61], "quickstart": 43, "deep": [43, 109, 110], "reinforc": [43, 49], "learn": [43, 49], "remind": [43, 44], "rl": [43, 49, 52, 62, 108, 109], "set": [43, 55, 58, 110], "gymnasium": [43, 54, 57, 61], "run": [43, 47], "v1": 43, "quick": [44, 49, 111], "hypothesi": [44, 52], "test": [44, 52, 64], "two": 44, "creat": [45, 53, 54, 55], "an": [45, 46, 49, 53, 54, 59, 62, 108, 110, 111], "optim": [46, 111], "its": 46, "hyperparamet": [46, 111], "setup": [47, 111], "yaml": 47, "config": 47, "file": 47, "parallel": 48, "thread": [48, 60], "process": 48, "spawn": 48, "forkserv": 48, "fork": [48, 62], "start": [49, 111], "requir": 49, "librari": [49, 50, 57, 60, 108, 111], "choos": 49, "defin": 49, "baselin": [49, 57], "expect": [49, 50], "final": 49, "polici": [49, 61], "period": 49, "definit": 50, "iter": 50, "increas": 50, "reproduc": [51, 108, 110], "adapt": 52, "from": [52, 56], "stabl": [52, 57], "baselines3": 52, "result": 52, "visualis": 52, "how": [53, 54, 55, 56, 57, 58, 59, 60, 62, 111], "without": 53, "With": [53, 60], "stablebaselines3": 53, "your": [53, 54, 55, 58, 60], "own": [53, 54], "experimentmanag": [53, 55, 60, 76, 108], "anoth": 55, "output": 55, "video": [55, 61, 62], "some": 55, "advanc": [55, 59, 110], "other": [55, 59], "inform": [55, 59], "export": 56, "data": [56, 61], "tensorboard": 56, "extract": 56, "writerdata": 56, "default": 56, "extern": [57, 60, 111], "": 58, "logger": 58, "level": 58, "save": 59, "load": 59, "previou": [59, 109], "onli": 59, "user": [59, 110, 111], "why": 60, "basic": 60, "In": 60, "classic": 60, "usag": [60, 110], "multi": 60, "visual": [61, 108], "metric": 61, "gener": 61, "gif": 61, "rlberry_scool": 61, "env": [61, 70, 71, 72, 73, 74], "curv": 61, "raw": 61, "error": 61, "represent": 61, "confid": 61, "interv": 61, "predict": 61, "contribut": [62, 64, 108, 110, 111], "instal": [62, 109], "berri": 62, "note": 62, "pre": 62, "commit": 62, "option": [62, 109], "pull": 62, "request": 62, "checklist": 62, "continu": 62, "integr": 62, "ci": 62, "build": [62, 64], "markdown": 62, "link": 62, "between": 62, "document": [62, 64, 108, 112], "page": 62, "have": 62, "acknowledg": 62, "changelog": 63, "dev": 63, "version": [63, 109, 112], "0": [63, 109], "7": [63, 109], "3": [63, 109], "1": 63, "6": 63, "5": 63, "4": 63, "2": 63, "guidelin": 64, "docstr": 64, "agentwithsimplepolici": 67, "stable_baselin": 68, "stablebaselinesag": 68, "replai": 69, "replaybuff": 69, "pipelineenv": 70, "atari_mak": 71, "basewrapp": 72, "gym_mak": 73, "interfac": 74, "model": 74, "adastopcompar": 75, "multiplemanag": 77, "compare_ag": 78, "evaluate_ag": 79, "plot_smoothed_curv": 80, "plot_synchronized_curv": 81, "plot_writer_data": 82, "preset_manag": 83, "read_writer_data": 84, "run_venv_xp": 85, "tensorboard_folder_to_datafram": 86, "with_venv": 87, "safe_rese": 88, "seeder": 89, "set_external_se": 90, "paramet": 90, "box": 91, "dict": 92, "discret": 93, "multibinari": 94, "multidiscret": 95, "tupl": 96, "check_env": 97, "check_experiment_manag": 98, "check_fit_addit": 99, "check_rl_ag": 100, "check_save_load": 101, "check_seeding_ag": 102, "set_level": 103, "defaultwrit": 104, "rescalerewardwrapp": 105, "discretize_st": 106, "discretizestatewrapp": 106, "gym_util": 107, "research": 108, "educ": 108, "content": 108, "featur": 108, "statist": 108, "And": 108, "mani": 108, "more": 108, "latest": 109, "develop": 109, "guid": [110, 111], "introduct": 110, "up": 110, "tutori": 111, "differ": 111, "compat": 111}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"About us": [[0, "about-us"]], "Contributors": [[0, "contributors"]], "Citing rlberry": [[0, "citing-rlberry"]], "Funding": [[0, "funding"]], "rlberry API": [[1, "rlberry-api"]], "Manager": [[1, "manager"]], "Main classes": [[1, "main-classes"]], "Evaluation and plot": [[1, "evaluation-and-plot"]], "Agents": [[1, "agents"]], "Base classes": [[1, "base-classes"]], "Agent importation tools": [[1, "agent-importation-tools"]], "Environments": [[1, "environments"]], "Base class": [[1, "base-class"]], "Spaces": [[1, "spaces"]], "Environment tools": [[1, "environment-tools"]], "Seeding": [[1, "seeding"]], "Utilities, Logging & Typing": [[1, "utilities-logging-typing"]], "Manager Utilitis": [[1, "manager-utilitis"]], "Virtual environment Utilities": [[1, "virtual-environment-utilities"]], "Writer Utilities": [[1, "writer-utilities"]], "Check Utilities": [[1, "check-utilities"]], "Logging Utilities": [[1, "logging-utilities"]], "Environment Wrappers": [[1, "environment-wrappers"]], "Compare PPO and A2C on Acrobot with AdaStop": [[2, "compare-ppo-and-a2c-on-acrobot-with-adastop"]], "Compare Bandit Algorithms": [[3, "compare-bandit-algorithms"]], "SAC Soft Actor-Critic": [[4, "sac-soft-actor-critic"]], "Illustration of rlberry agents": [[5, "illustration-of-rlberry-agents"], [36, "illustration-of-rlberry-agents"]], "A demo of A2C algorithm in PBall2D environment": [[6, "a-demo-of-a2c-algorithm-in-pball2d-environment"]], "A demo of DQN algorithm in CartPole environment": [[7, "a-demo-of-dqn-algorithm-in-cartpole-environment"]], "A demo of MBQVI algorithm in Gridworld environment": [[8, "a-demo-of-mbqvi-algorithm-in-gridworld-environment"]], "A demo of M-DQN algorithm in CartPole environment": [[9, "a-demo-of-m-dqn-algorithm-in-cartpole-environment"]], "A demo of PPO algorithm in PBall2D environment": [[10, "a-demo-of-ppo-algorithm-in-pball2d-environment"]], "A demo of RSKernelUCBVIAgent algorithm in Acrobot environment": [[11, "a-demo-of-rskernelucbviagent-algorithm-in-acrobot-environment"]], "A demo of RSUCBVI algorithm in MountainCar environment": [[12, "a-demo-of-rsucbvi-algorithm-in-mountaincar-environment"]], "A demo of ValueIteration algorithm in Chain environment": [[13, "a-demo-of-valueiteration-algorithm-in-chain-environment"]], "Illustration of bandits in rlberry": [[14, "illustration-of-bandits-in-rlberry"], [36, "illustration-of-bandits-in-rlberry"]], "Comparison of Thompson sampling and UCB on Bernoulli and Gaussian bandits": [[15, "comparison-of-thompson-sampling-and-ucb-on-bernoulli-and-gaussian-bandits"]], "Comparison subplots of various index based bandits algorithms": [[16, "comparison-subplots-of-various-index-based-bandits-algorithms"]], "EXP3 Bandit cumulative regret": [[17, "exp3-bandit-cumulative-regret"]], "A demo of Bandit BAI on a real dataset to select mirrors": [[18, "a-demo-of-bandit-bai-on-a-real-dataset-to-select-mirrors"]], "UCB Bandit cumulative regret": [[19, "ucb-bandit-cumulative-regret"]], "Computation times": [[20, "computation-times"], [42, "computation-times"]], "A demo of ATARI Atlantis environment with vectorized PPOAgent": [[21, "a-demo-of-atari-atlantis-environment-with-vectorized-ppoagent"]], "A demo of ATARI Breakout environment with vectorized PPOAgent": [[22, "a-demo-of-atari-breakout-environment-with-vectorized-ppoagent"]], "Illustration of rlberry environments": [[23, "illustration-of-rlberry-environments"], [36, "illustration-of-rlberry-environments"]], "A demo of Acrobot environment with RSUCBVIAgent": [[24, "a-demo-of-acrobot-environment-with-rsucbviagent"]], "A demo of AppleGold environment": [[25, "a-demo-of-applegold-environment"]], "A demo of ATARI Freeway environment with DQNAgent": [[26, "a-demo-of-atari-freeway-environment-with-dqnagent"]], "A demo of Chain environment": [[27, "a-demo-of-chain-environment"]], "A demo of Gridworld environment with ValueIterationAgent": [[28, "a-demo-of-gridworld-environment-with-valueiterationagent"]], "A demo of MountainCar environment": [[29, "a-demo-of-mountaincar-environment"]], "A demo of OldGymCompatibilityWrapper with old_Acrobot environment": [[30, "a-demo-of-oldgymcompatibilitywrapper-with-old-acrobot-environment"]], "A demo of PBALL2D environment": [[31, "a-demo-of-pball2d-environment"]], "A demo of rooms environment": [[32, "a-demo-of-rooms-environment"]], "A demo of SpringCartPole environment with DQNAgent": [[33, "a-demo-of-springcartpole-environment-with-dqnagent"]], "A demo of twinrooms environment": [[34, "a-demo-of-twinrooms-environment"]], "Using multiple virtual environments with rlberry": [[35, "using-multiple-virtual-environments-with-rlberry"]], "Gallery of examples": [[36, "gallery-of-examples"]], "A demo of Experiment Manager": [[37, "a-demo-of-experiment-manager"]], "Checkpointing": [[38, "checkpointing"]], "Plot kernel functions": [[39, "plot-kernel-functions"]], "Illustration of plotting tools on Bandits": [[40, "illustration-of-plotting-tools-on-bandits"]], "Record reward during training and then plot it": [[41, "record-reward-during-training-and-then-plot-it"]], "Quickstart for Deep Reinforcement Learning in rlberry": [[43, "quickstart-for-deep-reinforcement-learning-in-rlberry"]], "Imports": [[43, "imports"]], "Reminder of the RL setting": [[43, "reminder-of-the-rl-setting"]], "Gymnasium Environment": [[43, "gymnasium-environment"]], "Running A2C on Acrobot-v1": [[43, "running-a2c-on-acrobot-v1"]], "Comparison of Agents": [[44, "comparison-of-agents"]], "Quick reminder on hypothesis testing": [[44, "quick-reminder-on-hypothesis-testing"]], "Two sample testing": [[44, "two-sample-testing"]], "Multiple testing": [[44, "multiple-testing"]], "Multiple agent comparison in rlberry": [[44, "multiple-agent-comparison-in-rlberry"]], "Create an agent": [[45, "create-an-agent"]], "Evaluate an agent and optimize its hyperparameters": [[46, "evaluate-an-agent-and-optimize-its-hyperparameters"]], "Setup and run experiments using yaml config files": [[47, "setup-and-run-experiments-using-yaml-config-files"]], "Parallelization in rlberry": [[48, "parallelization-in-rlberry"]], "Threading": [[48, "threading"]], "Process: spawn or forkserver": [[48, "process-spawn-or-forkserver"]], "Process: fork": [[48, "process-fork"]], "Quick Start for Reinforcement Learning in rlberry": [[49, "quick-start-for-reinforcement-learning-in-rlberry"]], "Importing required libraries": [[49, "importing-required-libraries"]], "Choosing an RL environment": [[49, "choosing-an-rl-environment"]], "Defining an agent and a baseline": [[49, "defining-an-agent-and-a-baseline"]], "Experiment Manager": [[49, "experiment-manager"]], "Comparing the expected rewards of the final policies": [[49, "comparing-the-expected-rewards-of-the-final-policies"]], "Comparing the agents during the learning period": [[49, "comparing-the-agents-during-the-learning-period"]], "Libraries": [[50, "libraries"]], "Environment definition": [[50, "environment-definition"]], "Agents definition": [[50, "agents-definition"]], "Comparisons": [[50, "comparisons"]], "Comparison of expected rewards.": [[50, "comparison-of-expected-rewards"]], "Comparison of cumulative regret as iterations increase": [[50, "comparison-of-cumulative-regret-as-iterations-increase"]], "Seeding & Reproducibility": [[51, "seeding-reproducibility"], [108, "seeding-reproducibility"]], "Adaptive hypothesis testing for comparison of RL agents with AdaStop": [[52, "adaptive-hypothesis-testing-for-comparison-of-rl-agents-with-adastop"]], "Hypothesis testing to compare RL agents": [[52, "hypothesis-testing-to-compare-rl-agents"]], "Comparison of A2C and PPO from stable-baselines3": [[52, "comparison-of-a2c-and-ppo-from-stable-baselines3"]], "Result visualisation": [[52, "result-visualisation"]], "How to use an Agent": [[53, "how-to-use-an-agent"]], "Use rlberry Agent": [[53, "use-rlberry-agent"]], "without agent": [[53, "without-agent"]], "With agent": [[53, "with-agent"]], "Use StableBaselines3 as rlberry Agent": [[53, "use-stablebaselines3-as-rlberry-agent"]], "Create your own Agent": [[53, "create-your-own-agent"]], "Use experimentManager": [[53, "use-experimentmanager"]], "How to use an environment": [[54, "how-to-use-an-environment"]], "Use rlberry environment": [[54, "use-rlberry-environment"]], "Use Gymnasium environment": [[54, "use-gymnasium-environment"]], "Use Atari environment": [[54, "use-atari-environment"]], "Create your own environment": [[54, "create-your-own-environment"]], "How to use the ExperimentManager": [[55, "how-to-use-the-experimentmanager"]], "Create your experiment": [[55, "create-your-experiment"]], "Compare with another agent": [[55, "compare-with-another-agent"]], "Output the video": [[55, "output-the-video"]], "Some advanced settings": [[55, "some-advanced-settings"]], "Other information": [[55, "other-information"], [59, "other-information"]], "How to export/import data (rlberry data, tensorboard data, \u2026)?": [[56, "how-to-export-import-data-rlberry-data-tensorboard-data"]], "How to extract data from the WriterData?": [[56, "how-to-extract-data-from-the-writerdata"]], "Default writer": [[56, "default-writer"]], "How to import data from tensorboard?": [[56, "how-to-import-data-from-tensorboard"]], "How to use the external libraries": [[57, "how-to-use-the-external-libraries"]], "Using rlberry and Gymnasium": [[57, "using-rlberry-and-gymnasium"]], "Using rlberry and Stable Baselines": [[57, "using-rlberry-and-stable-baselines"]], "How to log your experiment": [[58, "how-to-log-your-experiment"]], "Set rlberry\u2019s logger level": [[58, "set-rlberry-s-logger-level"]], "Writer": [[58, "writer"]], "How to save/load an experiment": [[59, "how-to-save-load-an-experiment"]], "how to save an experiment?": [[59, "how-to-save-an-experiment"]], "How to load a previous experiment?": [[59, "how-to-load-a-previous-experiment"]], "How to save/load an agent only? (advanced users)": [[59, "how-to-save-load-an-agent-only-advanced-users"]], "Save the agent": [[59, "save-the-agent"]], "Load the agent": [[59, "load-the-agent"]], "How to seed your experiment": [[60, "how-to-seed-your-experiment"]], "Why use seeding?": [[60, "why-use-seeding"]], "Basics": [[60, "basics"]], "In rlberry": [[60, "in-rlberry"]], "classic usage": [[60, "classic-usage"]], "With ExperimentManager": [[60, "with-experimentmanager"]], "multi-threading": [[60, "multi-threading"]], "External libraries": [[60, "external-libraries"]], "Visualization of policies and plots of training/evaluation metrics in rlberry": [[61, "visualization-of-policies-and-plots-of-training-evaluation-metrics-in-rlberry"]], "Generating videos and gif of the policy of a trained agent": [[61, "generating-videos-and-gif-of-the-policy-of-a-trained-agent"]], "Generating videos": [[61, "generating-videos"]], "Generating gifs with rlberry_scool env": [[61, "generating-gifs-with-rlberry-scool-env"]], "Generating gifs with Gymnasium env": [[61, "generating-gifs-with-gymnasium-env"]], "Plotting training data and reward curves in rlberry": [[61, "plotting-training-data-and-reward-curves-in-rlberry"]], "Plotting raw curves": [[61, "plotting-raw-curves"]], "Error representation \u2013 confidence intervals and prediction intervals": [[61, "error-representation-confidence-intervals-and-prediction-intervals"]], "Visualization of evaluations of trained agents in rlberry": [[61, "visualization-of-evaluations-of-trained-agents-in-rlberry"]], "How to contribute": [[62, "how-to-contribute"]], "Forking and installing rl-berry": [[62, "forking-and-installing-rl-berry"]], "Note": [[62, null]], "Pre-commit (optional)": [[62, "pre-commit-optional"]], "Pull request checklist": [[62, "pull-request-checklist"]], "Continuous integration (CI)": [[62, "continuous-integration-ci"]], "Building examples": [[62, "building-examples"]], "Markdown and link between documentation pages.": [[62, "markdown-and-link-between-documentation-pages"]], "Have a video for an example in the documentation": [[62, "have-a-video-for-an-example-in-the-documentation"]], "Acknowledgements": [[62, "acknowledgements"]], "Changelog": [[63, "changelog"]], "Dev version": [[63, "dev-version"]], "Version 0.7.3": [[63, "version-0-7-3"]], "Version 0.7.1": [[63, "version-0-7-1"]], "Version 0.7.0": [[63, "version-0-7-0"]], "Version 0.6.0": [[63, "version-0-6-0"]], "Version 0.5.0": [[63, "version-0-5-0"]], "Version 0.4.1": [[63, "version-0-4-1"]], "Version 0.4.0": [[63, "version-0-4-0"]], "Version 0.3.0": [[63, "version-0-3-0"]], "Version 0.2.1": [[63, "version-0-2-1"]], "Version 0.2": [[63, "version-0-2"]], "Contributing": [[64, "contributing"]], "Documentation": [[64, "documentation"]], "Building the documentation": [[64, "building-the-documentation"]], "Tests": [[64, "tests"]], "Guidelines for docstring": [[64, "guidelines-for-docstring"]], "Guidelines for logging": [[64, "guidelines-for-logging"]], "rlberry.agents.Agent": [[66, "rlberry-agents-agent"]], "Examples using rlberry.agents.Agent": [[66, "examples-using-rlberry-agents-agent"]], "rlberry.agents.AgentWithSimplePolicy": [[67, "rlberry-agents-agentwithsimplepolicy"]], "Examples using rlberry.agents.AgentWithSimplePolicy": [[67, "examples-using-rlberry-agents-agentwithsimplepolicy"]], "rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, "rlberry-agents-stable-baselines-stablebaselinesagent"]], "Examples using rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, "examples-using-rlberry-agents-stable-baselines-stablebaselinesagent"]], "rlberry.agents.utils.replay.ReplayBuffer": [[69, "rlberry-agents-utils-replay-replaybuffer"]], "rlberry.envs.PipelineEnv": [[70, "rlberry-envs-pipelineenv"]], "rlberry.envs.atari_make": [[71, "rlberry-envs-atari-make"]], "Examples using rlberry.envs.atari_make": [[71, "examples-using-rlberry-envs-atari-make"]], "rlberry.envs.basewrapper.Wrapper": [[72, "rlberry-envs-basewrapper-wrapper"]], "rlberry.envs.gym_make": [[73, "rlberry-envs-gym-make"]], "Examples using rlberry.envs.gym_make": [[73, "examples-using-rlberry-envs-gym-make"]], "rlberry.envs.interface.Model": [[74, "rlberry-envs-interface-model"]], "rlberry.manager.AdastopComparator": [[75, "rlberry-manager-adastopcomparator"]], "Examples using rlberry.manager.AdastopComparator": [[75, "examples-using-rlberry-manager-adastopcomparator"]], "rlberry.manager.ExperimentManager": [[76, "rlberry-manager-experimentmanager"]], "Examples using rlberry.manager.ExperimentManager": [[76, "examples-using-rlberry-manager-experimentmanager"]], "rlberry.manager.MultipleManagers": [[77, "rlberry-manager-multiplemanagers"]], "rlberry.manager.compare_agents": [[78, "rlberry-manager-compare-agents"]], "Examples using rlberry.manager.compare_agents": [[78, "examples-using-rlberry-manager-compare-agents"]], "rlberry.manager.evaluate_agents": [[79, "rlberry-manager-evaluate-agents"]], "Examples using rlberry.manager.evaluate_agents": [[79, "examples-using-rlberry-manager-evaluate-agents"]], "rlberry.manager.plot_smoothed_curves": [[80, "rlberry-manager-plot-smoothed-curves"]], "rlberry.manager.plot_synchronized_curves": [[81, "rlberry-manager-plot-synchronized-curves"]], "rlberry.manager.plot_writer_data": [[82, "rlberry-manager-plot-writer-data"]], "Examples using rlberry.manager.plot_writer_data": [[82, "examples-using-rlberry-manager-plot-writer-data"]], "rlberry.manager.preset_manager": [[83, "rlberry-manager-preset-manager"]], "rlberry.manager.read_writer_data": [[84, "rlberry-manager-read-writer-data"]], "Examples using rlberry.manager.read_writer_data": [[84, "examples-using-rlberry-manager-read-writer-data"]], "rlberry.manager.run_venv_xp": [[85, "rlberry-manager-run-venv-xp"]], "Examples using rlberry.manager.run_venv_xp": [[85, "examples-using-rlberry-manager-run-venv-xp"]], "rlberry.manager.tensorboard_folder_to_dataframe": [[86, "rlberry-manager-tensorboard-folder-to-dataframe"]], "rlberry.manager.with_venv": [[87, "rlberry-manager-with-venv"]], "Examples using rlberry.manager.with_venv": [[87, "examples-using-rlberry-manager-with-venv"]], "rlberry.seeding.safe_reseed": [[88, "rlberry-seeding-safe-reseed"]], "rlberry.seeding.seeder.Seeder": [[89, "rlberry-seeding-seeder-seeder"]], "rlberry.seeding.set_external_seed": [[90, "rlberry-seeding-set-external-seed"]], "Parameters": [[90, "parameters"]], "rlberry.spaces.Box": [[91, "rlberry-spaces-box"]], "rlberry.spaces.Dict": [[92, "rlberry-spaces-dict"]], "rlberry.spaces.Discrete": [[93, "rlberry-spaces-discrete"]], "Examples using rlberry.spaces.Discrete": [[93, "examples-using-rlberry-spaces-discrete"]], "rlberry.spaces.MultiBinary": [[94, "rlberry-spaces-multibinary"]], "rlberry.spaces.MultiDiscrete": [[95, "rlberry-spaces-multidiscrete"]], "rlberry.spaces.Tuple": [[96, "rlberry-spaces-tuple"]], "rlberry.utils.check_env": [[97, "rlberry-utils-check-env"]], "rlberry.utils.check_experiment_manager": [[98, "rlberry-utils-check-experiment-manager"]], "rlberry.utils.check_fit_additive": [[99, "rlberry-utils-check-fit-additive"]], "rlberry.utils.check_rl_agent": [[100, "rlberry-utils-check-rl-agent"]], "rlberry.utils.check_save_load": [[101, "rlberry-utils-check-save-load"]], "rlberry.utils.check_seeding_agent": [[102, "rlberry-utils-check-seeding-agent"]], "rlberry.utils.logging.set_level": [[103, "rlberry-utils-logging-set-level"]], "rlberry.utils.writers.DefaultWriter": [[104, "rlberry-utils-writers-defaultwriter"]], "rlberry.wrappers.RescaleRewardWrapper": [[105, "rlberry-wrappers-rescalerewardwrapper"]], "Examples using rlberry.wrappers.RescaleRewardWrapper": [[105, "examples-using-rlberry-wrappers-rescalerewardwrapper"]], "rlberry.wrappers.discretize_state.DiscretizeStateWrapper": [[106, "rlberry-wrappers-discretize-state-discretizestatewrapper"]], "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, "rlberry-wrappers-gym-utils-oldgymcompatibilitywrapper"]], "Examples using rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, "examples-using-rlberry-wrappers-gym-utils-oldgymcompatibilitywrapper"]], "An RL Library for Research and Education": [[108, "an-rl-library-for-research-and-education"]], "Documentation Contents": [[108, "documentation-contents"]], "Contributing to rlberry": [[108, "contributing-to-rlberry"], [110, "contributing-to-rlberry"]], "rlberry main features": [[108, "rlberry-main-features"]], "ExperimentManager": [[108, "experimentmanager"]], "Statistical comparison of RL agents": [[108, "statistical-comparison-of-rl-agents"]], "Compare agents": [[108, "compare-agents"]], "AdaStop": [[108, "adastop"]], "Visualization": [[108, "visualization"]], "And many more !": [[108, "and-many-more"]], "Installation": [[109, "installation"]], "Latest version (0.7.3)": [[109, "latest-version-0-7-3"]], "Options": [[109, "options"]], "Development version": [[109, "development-version"]], "Previous versions": [[109, "previous-versions"]], "Deep RL agents": [[109, "deep-rl-agents"]], "User Guide": [[110, "user-guide"]], "Introduction": [[110, "introduction"]], "Set up an experiment": [[110, "set-up-an-experiment"]], "Experimenting with Deep agents": [[110, "experimenting-with-deep-agents"]], "Reproducibility": [[110, "reproducibility"]], "Advanced Usage": [[110, "advanced-usage"]], "User guide": [[111, "user-guide"]], "Tutorials": [[111, "tutorials"]], "Quick start: setup an experiment and evaluate different agents": [[111, "quick-start-setup-an-experiment-and-evaluate-different-agents"]], "Agents, hyperparameter optimization and experiment setup": [[111, "agents-hyperparameter-optimization-and-experiment-setup"]], "Compatibility with External Libraries": [[111, "compatibility-with-external-libraries"]], "How to contribute?": [[111, "how-to-contribute"]], "Documentation versions": [[112, "documentation-versions"]]}, "indexentries": {"agent (class in rlberry.agents)": [[66, "rlberry.agents.Agent"]], "eval() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.eval"]], "fit() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.fit"]], "get_params() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.get_params"]], "load() (rlberry.agents.agent class method)": [[66, "rlberry.agents.Agent.load"]], "output_dir (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.output_dir"]], "reseed() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.reseed"]], "rng (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.rng"]], "sample_parameters() (rlberry.agents.agent class method)": [[66, "rlberry.agents.Agent.sample_parameters"]], "save() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.save"]], "set_writer() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.set_writer"]], "thread_shared_data (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.thread_shared_data"]], "unique_id (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.unique_id"]], "writer (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.writer"]], "agentwithsimplepolicy (class in rlberry.agents)": [[67, "rlberry.agents.AgentWithSimplePolicy"]], "eval() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.eval"]], "fit() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.fit"]], "get_params() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.get_params"]], "load() (rlberry.agents.agentwithsimplepolicy class method)": [[67, "rlberry.agents.AgentWithSimplePolicy.load"]], "output_dir (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.output_dir"]], "policy() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.policy"]], "reseed() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.reseed"]], "rng (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.rng"]], "sample_parameters() (rlberry.agents.agentwithsimplepolicy class method)": [[67, "rlberry.agents.AgentWithSimplePolicy.sample_parameters"]], "save() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.save"]], "set_writer() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.set_writer"]], "thread_shared_data (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.thread_shared_data"]], "unique_id (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.unique_id"]], "writer (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.writer"]], "stablebaselinesagent (class in rlberry.agents.stable_baselines)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent"]], "eval() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.eval"]], "fit() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.fit"]], "get_params() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.get_params"]], "load() (rlberry.agents.stable_baselines.stablebaselinesagent class method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.load"]], "output_dir (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.output_dir"]], "policy() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.policy"]], "reseed() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.reseed"]], "rng (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.rng"]], "sample_parameters() (rlberry.agents.stable_baselines.stablebaselinesagent class method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.sample_parameters"]], "save() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.save"]], "set_logger() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.set_logger"]], "set_writer() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.set_writer"]], "thread_shared_data (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.thread_shared_data"]], "unique_id (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.unique_id"]], "writer (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.writer"]], "replaybuffer (class in rlberry.agents.utils.replay)": [[69, "rlberry.agents.utils.replay.ReplayBuffer"]], "append() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.append"]], "clear() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.clear"]], "data (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.data"]], "dtypes (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.dtypes"]], "end_episode() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.end_episode"]], "max_episode_steps (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.max_episode_steps"]], "sample() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.sample"]], "setup_entry() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.setup_entry"]], "tags (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.tags"]], "update_priorities() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.update_priorities"]], "pipelineenv() (in module rlberry.envs)": [[70, "rlberry.envs.PipelineEnv"]], "atari_make() (in module rlberry.envs)": [[71, "rlberry.envs.atari_make"]], "wrapper (class in rlberry.envs.basewrapper)": [[72, "rlberry.envs.basewrapper.Wrapper"]], "close() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.close"]], "get_params() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_params"]], "get_video() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_video"]], "get_wrapper_attr() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_wrapper_attr"]], "is_generative() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.is_generative"]], "is_online() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.is_online"]], "np_random (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.np_random"]], "render() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.render"]], "reseed() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.reseed"]], "reset() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.reset"]], "rng (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.rng"]], "sample() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.sample"]], "save_video() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.save_video"]], "step() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.step"]], "unwrapped (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.unwrapped"]], "gym_make() (in module rlberry.envs)": [[73, "rlberry.envs.gym_make"]], "model (class in rlberry.envs.interface)": [[74, "rlberry.envs.interface.Model"]], "close() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.close"]], "get_params() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.get_params"]], "get_wrapper_attr() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.get_wrapper_attr"]], "is_generative() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.is_generative"]], "is_online() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.is_online"]], "np_random (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.np_random"]], "render() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.render"]], "reseed() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.reseed"]], "reset() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.reset"]], "rng (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.rng"]], "sample() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.sample"]], "step() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.step"]], "unwrapped (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.unwrapped"]], "adastopcomparator (class in rlberry.manager)": [[75, "rlberry.manager.AdastopComparator"]], "compare() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.compare"]], "compute_mean_diffs() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.compute_mean_diffs"]], "get_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.get_results"]], "partial_compare() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.partial_compare"]], "plot_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.plot_results"]], "plot_results_sota() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.plot_results_sota"]], "print_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.print_results"]], "experimentmanager (class in rlberry.manager)": [[76, "rlberry.manager.ExperimentManager"]], "build_eval_env() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.build_eval_env"]], "clear_handlers() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.clear_handlers"]], "clear_output_dir() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.clear_output_dir"]], "eval_agents() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.eval_agents"]], "fit() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.fit"]], "generate_profile() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.generate_profile"]], "get_agent_instances() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.get_agent_instances"]], "get_writer_data() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.get_writer_data"]], "load() (rlberry.manager.experimentmanager class method)": [[76, "rlberry.manager.ExperimentManager.load"]], "optimize_hyperparams() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.optimize_hyperparams"]], "save() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.save"]], "set_writer() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.set_writer"]], "multiplemanagers (class in rlberry.manager)": [[77, "rlberry.manager.MultipleManagers"]], "append() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.append"]], "run() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.run"]], "save() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.save"]], "compare_agents() (in module rlberry.manager)": [[78, "rlberry.manager.compare_agents"]], "evaluate_agents() (in module rlberry.manager)": [[79, "rlberry.manager.evaluate_agents"]], "plot_smoothed_curves() (in module rlberry.manager)": [[80, "rlberry.manager.plot_smoothed_curves"]], "plot_synchronized_curves() (in module rlberry.manager)": [[81, "rlberry.manager.plot_synchronized_curves"]], "plot_writer_data() (in module rlberry.manager)": [[82, "rlberry.manager.plot_writer_data"]], "preset_manager() (in module rlberry.manager)": [[83, "rlberry.manager.preset_manager"]], "read_writer_data() (in module rlberry.manager)": [[84, "rlberry.manager.read_writer_data"]], "run_venv_xp() (in module rlberry.manager)": [[85, "rlberry.manager.run_venv_xp"]], "tensorboard_folder_to_dataframe() (in module rlberry.manager)": [[86, "rlberry.manager.tensorboard_folder_to_dataframe"]], "with_venv() (in module rlberry.manager)": [[87, "rlberry.manager.with_venv"]], "safe_reseed() (in module rlberry.seeding)": [[88, "rlberry.seeding.safe_reseed"]], "seeder (class in rlberry.seeding.seeder)": [[89, "rlberry.seeding.seeder.Seeder"]], "reseed() (rlberry.seeding.seeder.seeder method)": [[89, "rlberry.seeding.seeder.Seeder.reseed"]], "spawn() (rlberry.seeding.seeder.seeder method)": [[89, "rlberry.seeding.seeder.Seeder.spawn"]], "set_external_seed() (in module rlberry.seeding)": [[90, "rlberry.seeding.set_external_seed"]], "box (class in rlberry.spaces)": [[91, "rlberry.spaces.Box"]], "contains() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.contains"]], "from_jsonable() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.from_jsonable"]], "is_bounded() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.is_bounded"]], "is_np_flattenable (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.is_np_flattenable"]], "np_random (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.np_random"]], "reseed() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.reseed"]], "sample() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.sample"]], "seed() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.seed"]], "shape (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.shape"]], "to_jsonable() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.to_jsonable"]], "dict (class in rlberry.spaces)": [[92, "rlberry.spaces.Dict"]], "contains() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.contains"]], "from_jsonable() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.from_jsonable"]], "get() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.get"]], "is_np_flattenable (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.is_np_flattenable"]], "items() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.items"]], "keys() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.keys"]], "np_random (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.np_random"]], "sample() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.sample"]], "seed() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.seed"]], "shape (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.shape"]], "to_jsonable() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.to_jsonable"]], "values() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.values"]], "discrete (class in rlberry.spaces)": [[93, "rlberry.spaces.Discrete"]], "contains() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.contains"]], "from_jsonable() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.from_jsonable"]], "is_np_flattenable (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.is_np_flattenable"]], "np_random (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.np_random"]], "reseed() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.reseed"]], "sample() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.sample"]], "seed() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.seed"]], "shape (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.shape"]], "to_jsonable() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.to_jsonable"]], "multibinary (class in rlberry.spaces)": [[94, "rlberry.spaces.MultiBinary"]], "contains() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.contains"]], "from_jsonable() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.from_jsonable"]], "is_np_flattenable (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.is_np_flattenable"]], "np_random (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.np_random"]], "reseed() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.reseed"]], "sample() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.sample"]], "seed() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.seed"]], "shape (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.shape"]], "to_jsonable() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.to_jsonable"]], "multidiscrete (class in rlberry.spaces)": [[95, "rlberry.spaces.MultiDiscrete"]], "contains() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.contains"]], "from_jsonable() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.from_jsonable"]], "is_np_flattenable (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.is_np_flattenable"]], "np_random (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.np_random"]], "reseed() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.reseed"]], "sample() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.sample"]], "seed() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.seed"]], "shape (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.shape"]], "to_jsonable() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.to_jsonable"]], "tuple (class in rlberry.spaces)": [[96, "rlberry.spaces.Tuple"]], "contains() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.contains"]], "count() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.count"]], "from_jsonable() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.from_jsonable"]], "index() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.index"]], "is_np_flattenable (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.is_np_flattenable"]], "np_random (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.np_random"]], "sample() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.sample"]], "seed() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.seed"]], "shape (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.shape"]], "to_jsonable() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.to_jsonable"]], "check_env() (in module rlberry.utils)": [[97, "rlberry.utils.check_env"]], "check_experiment_manager() (in module rlberry.utils)": [[98, "rlberry.utils.check_experiment_manager"]], "check_fit_additive() (in module rlberry.utils)": [[99, "rlberry.utils.check_fit_additive"]], "check_rl_agent() (in module rlberry.utils)": [[100, "rlberry.utils.check_rl_agent"]], "check_save_load() (in module rlberry.utils)": [[101, "rlberry.utils.check_save_load"]], "check_seeding_agent() (in module rlberry.utils)": [[102, "rlberry.utils.check_seeding_agent"]], "set_level() (in module rlberry.utils.logging)": [[103, "rlberry.utils.logging.set_level"]], "defaultwriter (class in rlberry.utils.writers)": [[104, "rlberry.utils.writers.DefaultWriter"]], "add_scalar() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.add_scalar"]], "add_scalars() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.add_scalars"]], "read_first_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_first_tag_value"]], "read_last_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_last_tag_value"]], "read_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_tag_value"]], "reset() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.reset"]], "rescalerewardwrapper (class in rlberry.wrappers)": [[105, "rlberry.wrappers.RescaleRewardWrapper"]], "close() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.close"]], "get_params() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_params"]], "get_video() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.is_generative"]], "is_online() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.is_online"]], "np_random (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.np_random"]], "render() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.render"]], "reseed() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.reseed"]], "reset() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.reset"]], "rng (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.rng"]], "sample() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.sample"]], "save_video() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.save_video"]], "step() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.step"]], "unwrapped (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.unwrapped"]], "discretizestatewrapper (class in rlberry.wrappers.discretize_state)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper"]], "close() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.close"]], "get_params() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_params"]], "get_video() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.is_generative"]], "is_online() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.is_online"]], "np_random (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.np_random"]], "render() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.render"]], "reseed() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.reseed"]], "reset() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.reset"]], "rng (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.rng"]], "sample() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.sample"]], "save_video() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.save_video"]], "step() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.step"]], "unwrapped (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.unwrapped"]], "oldgymcompatibilitywrapper (class in rlberry.wrappers.gym_utils)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper"]], "close() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.close"]], "get_params() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_params"]], "get_video() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.is_generative"]], "is_online() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.is_online"]], "np_random (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.np_random"]], "render() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.render"]], "reseed() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.reseed"]], "reset() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.reset"]], "rng (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.rng"]], "sample() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.sample"]], "save_video() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.save_video"]], "step() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.step"]], "unwrapped (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.unwrapped"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["about", "api", "auto_examples/adastop_example", "auto_examples/comparison_agents", "auto_examples/demo_agents/demo_SAC", "auto_examples/demo_agents/index", "auto_examples/demo_agents/video_plot_a2c", "auto_examples/demo_agents/video_plot_dqn", "auto_examples/demo_agents/video_plot_mbqvi", "auto_examples/demo_agents/video_plot_mdqn", "auto_examples/demo_agents/video_plot_ppo", "auto_examples/demo_agents/video_plot_rs_kernel_ucbvi", "auto_examples/demo_agents/video_plot_rsucbvi", "auto_examples/demo_agents/video_plot_vi", "auto_examples/demo_bandits/index", "auto_examples/demo_bandits/plot_TS_bandit", "auto_examples/demo_bandits/plot_compare_index_bandits", "auto_examples/demo_bandits/plot_exp3_bandit", "auto_examples/demo_bandits/plot_mirror_bandit", "auto_examples/demo_bandits/plot_ucb_bandit", "auto_examples/demo_bandits/sg_execution_times", "auto_examples/demo_env/example_atari_atlantis_vectorized_ppo", "auto_examples/demo_env/example_atari_breakout_vectorized_ppo", "auto_examples/demo_env/index", "auto_examples/demo_env/video_plot_acrobot", "auto_examples/demo_env/video_plot_apple_gold", "auto_examples/demo_env/video_plot_atari_freeway", "auto_examples/demo_env/video_plot_chain", "auto_examples/demo_env/video_plot_gridworld", "auto_examples/demo_env/video_plot_mountain_car", "auto_examples/demo_env/video_plot_old_gym_compatibility_wrapper_old_acrobot", "auto_examples/demo_env/video_plot_pball", "auto_examples/demo_env/video_plot_rooms", "auto_examples/demo_env/video_plot_springcartpole", "auto_examples/demo_env/video_plot_twinrooms", "auto_examples/example_venv", "auto_examples/index", "auto_examples/plot_agent_manager", "auto_examples/plot_checkpointing", "auto_examples/plot_kernels", "auto_examples/plot_smooth", "auto_examples/plot_writer_wrapper", "auto_examples/sg_execution_times", "basics/DeepRLTutorial/TutorialDeepRL", "basics/comparison", "basics/create_agent", "basics/evaluate_agent", "basics/experiment_setup", "basics/multiprocess", "basics/quick_start_rl/quickstart", "basics/rlberry how to", "basics/seeding", "basics/userguide/adastop", "basics/userguide/agent", "basics/userguide/environment", "basics/userguide/experimentManager", "basics/userguide/export_training_data", "basics/userguide/external_lib", "basics/userguide/logging", "basics/userguide/save_load", "basics/userguide/seeding", "basics/userguide/visualization", "beginner_dev_guide", "changelog", "contributing", "contributors", "generated/rlberry.agents.Agent", "generated/rlberry.agents.AgentWithSimplePolicy", "generated/rlberry.agents.stable_baselines.StableBaselinesAgent", "generated/rlberry.agents.utils.replay.ReplayBuffer", "generated/rlberry.envs.PipelineEnv", "generated/rlberry.envs.atari_make", "generated/rlberry.envs.basewrapper.Wrapper", "generated/rlberry.envs.gym_make", "generated/rlberry.envs.interface.Model", "generated/rlberry.manager.AdastopComparator", "generated/rlberry.manager.ExperimentManager", "generated/rlberry.manager.MultipleManagers", "generated/rlberry.manager.compare_agents", "generated/rlberry.manager.evaluate_agents", "generated/rlberry.manager.plot_smoothed_curves", "generated/rlberry.manager.plot_synchronized_curves", "generated/rlberry.manager.plot_writer_data", "generated/rlberry.manager.preset_manager", "generated/rlberry.manager.read_writer_data", "generated/rlberry.manager.run_venv_xp", "generated/rlberry.manager.tensorboard_folder_to_dataframe", "generated/rlberry.manager.with_venv", "generated/rlberry.seeding.safe_reseed", "generated/rlberry.seeding.seeder.Seeder", "generated/rlberry.seeding.set_external_seed", "generated/rlberry.spaces.Box", "generated/rlberry.spaces.Dict", "generated/rlberry.spaces.Discrete", "generated/rlberry.spaces.MultiBinary", "generated/rlberry.spaces.MultiDiscrete", "generated/rlberry.spaces.Tuple", "generated/rlberry.utils.check_env", "generated/rlberry.utils.check_experiment_manager", "generated/rlberry.utils.check_fit_additive", "generated/rlberry.utils.check_rl_agent", "generated/rlberry.utils.check_save_load", "generated/rlberry.utils.check_seeding_agent", "generated/rlberry.utils.logging.set_level", "generated/rlberry.utils.writers.DefaultWriter", "generated/rlberry.wrappers.RescaleRewardWrapper", "generated/rlberry.wrappers.discretize_state.DiscretizeStateWrapper", "generated/rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper", "index", "installation", "user_guide", "user_guide2", "versions"], "filenames": ["about.rst", "api.rst", "auto_examples/adastop_example.rst", "auto_examples/comparison_agents.rst", "auto_examples/demo_agents/demo_SAC.rst", "auto_examples/demo_agents/index.rst", "auto_examples/demo_agents/video_plot_a2c.rst", "auto_examples/demo_agents/video_plot_dqn.rst", "auto_examples/demo_agents/video_plot_mbqvi.rst", "auto_examples/demo_agents/video_plot_mdqn.rst", "auto_examples/demo_agents/video_plot_ppo.rst", "auto_examples/demo_agents/video_plot_rs_kernel_ucbvi.rst", "auto_examples/demo_agents/video_plot_rsucbvi.rst", "auto_examples/demo_agents/video_plot_vi.rst", "auto_examples/demo_bandits/index.rst", "auto_examples/demo_bandits/plot_TS_bandit.rst", "auto_examples/demo_bandits/plot_compare_index_bandits.rst", "auto_examples/demo_bandits/plot_exp3_bandit.rst", "auto_examples/demo_bandits/plot_mirror_bandit.rst", "auto_examples/demo_bandits/plot_ucb_bandit.rst", "auto_examples/demo_bandits/sg_execution_times.rst", "auto_examples/demo_env/example_atari_atlantis_vectorized_ppo.rst", "auto_examples/demo_env/example_atari_breakout_vectorized_ppo.rst", "auto_examples/demo_env/index.rst", "auto_examples/demo_env/video_plot_acrobot.rst", "auto_examples/demo_env/video_plot_apple_gold.rst", "auto_examples/demo_env/video_plot_atari_freeway.rst", "auto_examples/demo_env/video_plot_chain.rst", "auto_examples/demo_env/video_plot_gridworld.rst", "auto_examples/demo_env/video_plot_mountain_car.rst", "auto_examples/demo_env/video_plot_old_gym_compatibility_wrapper_old_acrobot.rst", "auto_examples/demo_env/video_plot_pball.rst", "auto_examples/demo_env/video_plot_rooms.rst", "auto_examples/demo_env/video_plot_springcartpole.rst", "auto_examples/demo_env/video_plot_twinrooms.rst", "auto_examples/example_venv.rst", "auto_examples/index.rst", "auto_examples/plot_agent_manager.rst", "auto_examples/plot_checkpointing.rst", "auto_examples/plot_kernels.rst", "auto_examples/plot_smooth.rst", "auto_examples/plot_writer_wrapper.rst", "auto_examples/sg_execution_times.rst", "basics/DeepRLTutorial/TutorialDeepRL.md", "basics/comparison.md", "basics/create_agent.rst", "basics/evaluate_agent.rst", "basics/experiment_setup.rst", "basics/multiprocess.rst", "basics/quick_start_rl/quickstart.md", "basics/rlberry how to.rst", "basics/seeding.rst", "basics/userguide/adastop.md", "basics/userguide/agent.md", "basics/userguide/environment.md", "basics/userguide/experimentManager.md", "basics/userguide/export_training_data.md", "basics/userguide/external_lib.md", "basics/userguide/logging.md", "basics/userguide/save_load.md", "basics/userguide/seeding.md", "basics/userguide/visualization.md", "beginner_dev_guide.md", "changelog.rst", "contributing.md", "contributors.rst", "generated/rlberry.agents.Agent.rst", "generated/rlberry.agents.AgentWithSimplePolicy.rst", "generated/rlberry.agents.stable_baselines.StableBaselinesAgent.rst", "generated/rlberry.agents.utils.replay.ReplayBuffer.rst", "generated/rlberry.envs.PipelineEnv.rst", "generated/rlberry.envs.atari_make.rst", "generated/rlberry.envs.basewrapper.Wrapper.rst", "generated/rlberry.envs.gym_make.rst", "generated/rlberry.envs.interface.Model.rst", "generated/rlberry.manager.AdastopComparator.rst", "generated/rlberry.manager.ExperimentManager.rst", "generated/rlberry.manager.MultipleManagers.rst", "generated/rlberry.manager.compare_agents.rst", "generated/rlberry.manager.evaluate_agents.rst", "generated/rlberry.manager.plot_smoothed_curves.rst", "generated/rlberry.manager.plot_synchronized_curves.rst", "generated/rlberry.manager.plot_writer_data.rst", "generated/rlberry.manager.preset_manager.rst", "generated/rlberry.manager.read_writer_data.rst", "generated/rlberry.manager.run_venv_xp.rst", "generated/rlberry.manager.tensorboard_folder_to_dataframe.rst", "generated/rlberry.manager.with_venv.rst", "generated/rlberry.seeding.safe_reseed.rst", "generated/rlberry.seeding.seeder.Seeder.rst", "generated/rlberry.seeding.set_external_seed.rst", "generated/rlberry.spaces.Box.rst", "generated/rlberry.spaces.Dict.rst", "generated/rlberry.spaces.Discrete.rst", "generated/rlberry.spaces.MultiBinary.rst", "generated/rlberry.spaces.MultiDiscrete.rst", "generated/rlberry.spaces.Tuple.rst", "generated/rlberry.utils.check_env.rst", "generated/rlberry.utils.check_experiment_manager.rst", "generated/rlberry.utils.check_fit_additive.rst", "generated/rlberry.utils.check_rl_agent.rst", "generated/rlberry.utils.check_save_load.rst", "generated/rlberry.utils.check_seeding_agent.rst", "generated/rlberry.utils.logging.set_level.rst", "generated/rlberry.utils.writers.DefaultWriter.rst", "generated/rlberry.wrappers.RescaleRewardWrapper.rst", "generated/rlberry.wrappers.discretize_state.DiscretizeStateWrapper.rst", "generated/rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.rst", "index.md", "installation.md", "user_guide.md", "user_guide2.rst", "versions.rst"], "titles": ["About us", "rlberry API", "Compare PPO and A2C on Acrobot with AdaStop", "Compare Bandit Algorithms", "SAC Soft Actor-Critic", "Illustration of rlberry agents", "A demo of A2C algorithm in PBall2D environment", "A demo of DQN algorithm in CartPole environment", "A demo of MBQVI algorithm in Gridworld environment", "A demo of M-DQN algorithm in CartPole environment", "A demo of PPO algorithm in PBall2D environment", "A demo of RSKernelUCBVIAgent algorithm in Acrobot environment", "A demo of RSUCBVI algorithm in MountainCar environment", "A demo of ValueIteration algorithm in Chain environment", "Illustration of bandits in rlberry", "Comparison of Thompson sampling and UCB on Bernoulli and Gaussian bandits", "Comparison subplots of various index based bandits algorithms", "EXP3 Bandit cumulative regret", "A demo of Bandit BAI on a real dataset to select mirrors", "UCB Bandit cumulative regret", "Computation times", "A demo of ATARI Atlantis environment with vectorized PPOAgent", "A demo of ATARI Breakout environment with vectorized PPOAgent", "Illustration of rlberry environments", "A demo of Acrobot environment with RSUCBVIAgent", "A demo of AppleGold environment", "A demo of ATARI Freeway environment with DQNAgent", "A demo of Chain environment", "A demo of Gridworld environment with ValueIterationAgent", "A demo of MountainCar environment", "A demo of OldGymCompatibilityWrapper with old_Acrobot environment", "A demo of PBALL2D environment", "A demo of rooms environment", "A demo of SpringCartPole environment with DQNAgent", "A demo of twinrooms environment", "Using multiple virtual environments with rlberry", "Gallery of examples", "A demo of Experiment Manager", "Checkpointing", "Plot kernel functions", "Illustration of plotting tools on Bandits", "Record reward during training and then plot it", "Computation times", "Quickstart for Deep Reinforcement Learning in rlberry", "Comparison of Agents", "Create an agent", "Evaluate an agent and optimize its hyperparameters", "Setup and run experiments using yaml config files", "Parallelization in rlberry", "Quick Start for Reinforcement Learning in rlberry", "Libraries", "Seeding &amp; Reproducibility", "Adaptive hypothesis testing for comparison of RL agents with AdaStop", "How to use an Agent", "How to use an environment", "How to use the ExperimentManager", "How to export/import data (rlberry data, tensorboard data, \u2026)?", "How to use the external libraries", "How to log your experiment", "How to save/load an experiment", "How to seed your experiment", "Visualization of policies and plots of training/evaluation metrics in rlberry", "How to contribute", "Changelog", "Contributing", "&lt;no title&gt;", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents</span></code>.Agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents</span></code>.AgentWithSimplePolicy", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents.stable_baselines</span></code>.StableBaselinesAgent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.agents.utils.replay</span></code>.ReplayBuffer", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.PipelineEnv", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.atari_make", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs.basewrapper</span></code>.Wrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs</span></code>.gym_make", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.envs.interface</span></code>.Model", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.AdastopComparator", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.ExperimentManager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.MultipleManagers", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.compare_agents", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.evaluate_agents", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_smoothed_curves", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_synchronized_curves", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.plot_writer_data", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.preset_manager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.read_writer_data", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.run_venv_xp", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.tensorboard_folder_to_dataframe", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.manager</span></code>.with_venv", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding</span></code>.safe_reseed", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding.seeder</span></code>.Seeder", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.seeding</span></code>.set_external_seed", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Box", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Dict", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Discrete", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.MultiBinary", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.MultiDiscrete", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.spaces</span></code>.Tuple", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_env", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_experiment_manager", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_fit_additive", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_rl_agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_save_load", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils</span></code>.check_seeding_agent", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils.logging</span></code>.set_level", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.utils.writers</span></code>.DefaultWriter", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers</span></code>.RescaleRewardWrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers.discretize_state</span></code>.DiscretizeStateWrapper", "<code class=\"xref py py-mod docutils literal notranslate\"><span class=\"pre\">rlberry.wrappers.gym_utils</span></code>.OldGymCompatibilityWrapper", "An RL Library for Research and Education", "Installation", "User Guide", "User guide : contents", "Documentation versions"], "terms": {"thi": [0, 2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 28, 33, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 82, 84, 86, 87, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 109, 110], "project": [0, 54, 62], "wa": [0, 15, 16, 18, 37, 38, 41, 43, 54, 55, 56, 58, 62, 63, 69, 72, 74, 76, 88, 104, 105, 106, 107], "initi": [0, 35, 37, 38, 45, 46, 53, 56, 58, 59, 60, 63, 66, 67, 68, 72, 74, 76, 92, 96, 105, 106, 107], "i": [0, 6, 7, 8, 9, 10, 11, 12, 13, 16, 18, 21, 22, 26, 28, 33, 35, 37, 38, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109, 110], "activ": [0, 21, 22, 26, 62, 63, 109], "maintain": [0, 63], "inria": [0, 63], "scool": [0, 53, 54, 61, 63, 108, 110], "team": [0, 63, 112], "The": [0, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 22, 26, 35, 37, 38, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 72, 74, 76, 77, 78, 80, 82, 84, 86, 92, 94, 96, 104, 105, 106, 107, 108, 109, 110, 111], "follow": [0, 7, 9, 41, 43, 44, 51, 55, 56, 57, 60, 61, 62, 64, 75, 86], "peopl": 0, "contribut": [0, 63], "aleshi94": [0, 65], "brahimdriss": [0, 65], "matheu": [0, 65], "m": [0, 3, 5, 15, 16, 17, 19, 36, 40, 43, 62, 65, 73], "centa": [0, 65], "omar": [0, 65], "d": [0, 38, 65, 92], "r\u00e9my": [0, 65], "degenn": [0, 65], "yanni": [0, 65], "flet": [0, 65], "berliac": [0, 65], "hector": [0, 65], "kohler": [0, 65], "edouard": [0, 65], "leurent": [0, 65], "pierr": [0, 65], "m\u00e9nard": [0, 65], "wari": [0, 65], "radji": [0, 65], "sauxpa": [0, 65], "xuedong": [0, 65], "shang": [0, 65], "ju": [0, 65], "t": [0, 3, 15, 16, 17, 19, 38, 40, 44, 49, 50, 53, 58, 59, 60, 63, 65, 66, 67, 69, 72, 74, 75, 105, 106, 107, 108], "timotheemathieu": [0, 52, 65, 75], "riccardo": [0, 65], "della": [0, 65], "vecchia": [0, 65], "yannberthelot": [0, 65], "If": [0, 41, 48, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87, 88, 89, 91, 93, 94, 95, 104, 105, 106, 107, 108, 109, 110, 111], "you": [0, 41, 43, 45, 46, 47, 49, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 69, 72, 74, 76, 77, 105, 106, 107, 108, 109, 110, 111], "us": [0, 2, 3, 7, 9, 15, 16, 18, 21, 22, 26, 36, 37, 38, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 56, 58, 59, 61, 62, 63, 64, 69, 72, 74, 77, 80, 81, 89, 91, 92, 94, 95, 96, 98, 99, 100, 102, 104, 106, 108, 109, 110, 111], "scientif": 0, "public": 0, "we": [0, 15, 18, 25, 35, 37, 38, 41, 43, 44, 46, 49, 50, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 75, 76, 78, 80, 81, 82, 84, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 108, 109, 110, 111], "would": [0, 56, 58], "appreci": 0, "citat": 0, "bibtex": 0, "entri": [0, 59, 69, 104], "misc": [0, 63], "author": 0, "domingu": 0, "darwich": 0, "e": [0, 18, 38, 43, 44, 48, 49, 50, 52, 54, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 93, 105, 106, 107, 109], "nard": 0, "valko": 0, "michal": 0, "doi": [0, 78], "10": [0, 8, 9, 15, 16, 18, 21, 27, 28, 32, 34, 37, 38, 39, 41, 42, 43, 46, 49, 50, 52, 53, 54, 55, 56, 60, 61, 62, 63, 67, 68, 76, 78, 99], "5281": 0, "zenodo": 0, "5544540": 0, "month": 0, "titl": [0, 15, 16, 17, 19, 40, 41, 43, 49, 50, 62, 82], "A": [0, 3, 5, 14, 15, 16, 20, 23, 36, 42, 43, 47, 50, 51, 53, 54, 66, 67, 68, 69, 71, 72, 73, 74, 76, 79, 84, 92, 93, 94, 105, 106, 107, 109], "reinforc": [0, 46, 53, 54, 61, 63, 67, 68, 72, 74, 105, 106, 107, 108, 109, 110, 111], "learn": [0, 17, 35, 45, 53, 54, 55, 56, 60, 61, 62, 63, 67, 68, 72, 74, 80, 81, 105, 106, 107, 108, 109, 110, 111], "librari": [0, 35, 43, 48, 51, 52, 59, 61, 63, 66, 67, 75, 76, 80, 87, 90, 91, 92, 93, 94, 95, 96, 109, 110], "research": [0, 54, 60, 63, 109], "educ": 0, "url": [0, 18], "http": [0, 18, 37, 46, 51, 52, 53, 54, 62, 63, 64, 66, 67, 68, 69, 71, 72, 74, 75, 76, 78, 89, 90, 91, 105, 106, 107, 108, 109], "github": [0, 52, 54, 62, 63, 66, 67, 69, 72, 75, 78, 90, 108, 109], "com": [0, 18, 52, 62, 63, 66, 67, 69, 72, 75, 90, 91, 108, 109], "py": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 47, 54, 59, 62, 63, 64, 69, 72, 91, 108, 109], "year": 0, "2021": [0, 17, 53, 54], "like": [0, 44, 46, 48, 53, 54, 56, 64, 78, 92, 109, 110], "thank": 0, "particip": 0, "phd": 0, "": [0, 7, 9, 12, 18, 21, 22, 26, 35, 37, 43, 44, 45, 48, 53, 54, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 71, 72, 74, 76, 78, 82, 84, 86, 92, 96, 98, 99, 100, 102, 103, 105, 106, 107, 108, 110], "made": [0, 62], "happen": [0, 28, 61], "particular": [0, 52, 62, 76, 91, 92, 93, 94, 95, 96], "work": [0, 16, 40, 48, 51, 56, 60, 61, 62, 63, 76, 77, 109], "environ": [0, 4, 5, 17, 18, 19, 37, 40, 42, 45, 46, 47, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 71, 72, 73, 74, 76, 85, 87, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109, 110, 111], "universit\u00e9": 0, "de": 0, "lill": 0, "site": [0, 54], "uln": 0, "anr": 0, "anrt": 0, "renault": 0, "european": 0, "chist": 0, "era": 0, "delta": 0, "la": 0, "r\u00e9gion": 0, "haut": 0, "franc": 0, "mel": 0, "go": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 49, 53, 54, 55, 72, 74, 105, 106, 107], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 44, 49, 50, 59, 69, 72, 74, 105, 106, 107], "download": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], "full": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 62, 64, 69], "exampl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 69, 70, 72, 74, 80, 83, 89, 90, 100, 106, 108, 110, 111], "code": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 48, 49, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 68, 69, 72, 74, 76, 77, 105, 106, 107, 108], "illustr": [2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 42, 56, 61, 66, 67, 76, 82, 110], "adastop_compar": 2, "which": [2, 25, 37, 43, 44, 49, 50, 54, 56, 57, 61, 62, 63, 64, 66, 67, 68, 72, 74, 76, 78, 80, 81, 82, 88, 89, 90, 91, 93, 94, 95, 98, 99, 100, 102, 105, 106, 107], "adapt": [2, 44, 61, 63, 69, 91, 109, 110], "multipl": [2, 3, 36, 42, 52, 63, 76, 77, 78, 80, 81, 82, 85, 87, 92, 96, 110], "test": [2, 3, 18, 21, 22, 26, 30, 37, 43, 50, 53, 56, 59, 60, 62, 63, 75, 78, 98, 99, 100, 102, 108, 109, 110], "assess": [2, 3, 44, 49, 50, 61], "whether": [2, 3, 44, 68, 72, 74, 75, 76, 77, 80, 81, 82, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "train": [2, 3, 4, 7, 9, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 33, 35, 36, 37, 38, 40, 42, 43, 44, 45, 46, 47, 49, 50, 52, 53, 55, 56, 57, 58, 59, 60, 63, 66, 67, 68, 75, 76, 77, 78, 82, 84, 86, 104, 108, 110], "agent": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 47, 48, 51, 54, 56, 57, 58, 60, 62, 63, 64, 72, 74, 75, 76, 77, 78, 79, 82, 83, 84, 89, 98, 99, 100, 101, 102, 104, 105, 106, 107], "ar": [2, 3, 16, 18, 35, 38, 40, 43, 44, 46, 48, 49, 51, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 66, 67, 68, 72, 74, 75, 76, 80, 81, 82, 87, 92, 93, 95, 104, 105, 106, 107, 108, 110, 111], "statist": [2, 3, 37, 41, 44, 52, 53, 55, 56, 63, 75, 76, 77, 78], "differ": [2, 3, 38, 43, 44, 49, 51, 52, 54, 57, 58, 60, 61, 62, 66, 67, 68, 72, 74, 75, 76, 80, 81, 82, 84, 92, 96, 105, 106, 107, 108], "remark": [2, 3, 35, 44, 48, 49, 82], "case": [2, 3, 15, 28, 43, 44, 59, 61, 72, 74, 80, 81, 82, 105, 106, 107], "where": [2, 3, 21, 22, 26, 38, 43, 44, 48, 49, 55, 59, 60, 62, 66, 67, 68, 69, 76, 88, 93, 95, 104], "two": [2, 3, 15, 37, 45, 49, 50, 51, 53, 57, 60, 61, 63, 64, 75, 99, 108], "deem": [2, 3], "can": [2, 3, 7, 9, 37, 38, 41, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110], "mean": [2, 3, 15, 16, 18, 19, 35, 40, 44, 49, 50, 52, 56, 57, 61, 67, 68, 76, 78, 80, 81, 82, 108], "either": [2, 3, 44, 52, 61, 62], "thei": [2, 3, 60, 61, 62, 64, 80, 82, 88], "effici": [2, 3, 48, 49, 52, 64, 108, 111], "have": [2, 3, 43, 44, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 63, 64, 66, 67, 69, 72, 74, 80, 81, 82, 86, 105, 106, 107, 108], "been": [2, 3, 41, 44, 56, 72, 74, 105, 106, 107, 108], "enough": [2, 3, 44, 54, 108], "fit": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 37, 38, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 53, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 68, 75, 76, 77, 78, 79, 82, 83, 84, 99, 104], "variabl": [2, 3, 43, 49, 50, 54, 61, 72, 74, 94, 105, 106, 107], "result": [2, 7, 9, 18, 21, 22, 26, 30, 43, 44, 46, 48, 53, 56, 57, 58, 59, 61, 63, 72, 74, 75, 76, 78, 79, 82, 105, 106, 107, 108, 110], "info": [2, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 38, 41, 43, 45, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61, 64, 67, 69, 72, 74, 82, 84, 103, 105, 106, 107], "13": [2, 50, 53, 54, 55, 56], "35": [2, 43, 50, 53, 54, 55, 56], "finish": [2, 37, 43, 55, 58, 59, 60, 72, 74, 75, 105, 106, 107], "agent1": [2, 40, 44], "v": [2, 21, 22, 37, 43, 44, 50, 52, 53, 54, 75], "agent2": [2, 40, 44], "diff": [2, 44, 62], "std": [2, 15, 19, 40, 44, 61, 80, 81, 82], "1": [2, 4, 8, 11, 12, 15, 17, 18, 19, 21, 22, 24, 26, 27, 30, 31, 34, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 66, 67, 68, 69, 70, 72, 75, 76, 78, 79, 80, 82, 84, 89, 93, 94, 95, 104, 109], "2": [2, 8, 11, 16, 17, 18, 19, 21, 22, 24, 28, 30, 31, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 60, 66, 67, 68, 72, 75, 76, 78, 80], "decis": [2, 44, 52, 75], "0": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 67, 68, 69, 70, 72, 74, 75, 76, 78, 79, 80, 81, 82, 87, 93, 94, 95, 105, 106, 107], "274": 2, "85": [2, 50, 55, 56], "068": [2, 53], "189": 2, "206": [2, 63], "185": 2, "82553": 2, "71784": 2, "smaller": [2, 52, 69, 75], "from": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 82, 83, 84, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 100, 105, 106, 107, 110], "rlberri": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 26, 29, 30, 34, 37, 38, 40, 41, 42, 45, 46, 47, 50, 51, 52, 55, 59, 62, 63, 64, 109, 111], "env": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 66, 67, 68, 69, 76, 79, 82, 83, 84, 89, 97, 98, 99, 100, 101, 102, 105, 106, 107], "import": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 63, 64, 68, 69, 70, 71, 73, 76, 79, 80, 82, 83, 84, 87, 89, 90, 100, 110, 111], "gym_mak": [2, 7, 9, 21, 22, 26, 43, 44, 46, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 63, 68, 69, 70, 71, 76, 79, 82, 84, 89], "stable_baselines3": [2, 35, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 68, 71, 79], "stable_baselin": [2, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 63, 79], "stablebaselinesag": [2, 43, 44, 52, 53, 55, 56, 57, 58, 60, 61, 63, 79], "manag": [2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 35, 36, 38, 40, 41, 42, 43, 44, 46, 47, 48, 50, 52, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 71, 110, 111], "adastopcompar": [2, 52], "env_ctor": [2, 3, 4, 15, 16, 17, 18, 19, 37, 40, 41, 44, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 68, 70, 71, 73, 76, 83, 98, 99, 100, 102], "env_kwarg": [2, 3, 4, 15, 16, 17, 18, 19, 37, 40, 41, 44, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 68, 70, 71, 73, 76, 83, 98, 99, 100, 102], "dict": [2, 4, 21, 22, 26, 33, 35, 37, 41, 43, 44, 46, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 69, 71, 72, 74, 75, 76, 79, 82, 83, 84, 86, 98, 99, 100, 102, 104, 105, 106, 107], "id": [2, 18, 21, 22, 26, 43, 44, 46, 52, 55, 56, 57, 58, 59, 60, 61, 63, 66, 67, 68, 70, 71, 73, 79, 82, 84], "v1": [2, 7, 9, 35, 44, 46, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 68, 69, 70, 72, 73, 74, 79, 82, 84, 105, 106, 107, 110, 111], "agent_class": [2, 47, 52, 63, 76, 82, 84], "train_env": [2, 3, 47, 52, 55, 76, 83], "fit_budget": [2, 3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 76, 79, 82, 83, 84], "5e4": [2, 52, 56, 61], "agent_nam": [2, 4, 18, 21, 22, 26, 43, 44, 49, 52, 55, 56, 57, 58, 59, 60, 61, 75, 76, 79], "init_kwarg": [2, 17, 19, 21, 22, 26, 33, 37, 40, 43, 44, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 76, 79, 98, 99, 100, 101, 102], "algo_cl": [2, 43, 44, 52, 55, 56, 57, 58, 60, 61, 68, 79], "polici": [2, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 43, 45, 50, 52, 53, 56, 57, 59, 63, 66, 67, 68, 72, 74, 79, 105, 106, 107, 108, 110, 111], "mlppolici": [2, 35, 52, 53, 56, 57, 61, 68, 79], "verbos": [2, 35, 52, 53, 55, 56, 57, 58, 61, 64, 68, 75, 76, 79, 85, 87], "print": [2, 3, 7, 8, 9, 11, 12, 13, 18, 21, 22, 25, 26, 28, 32, 35, 38, 43, 44, 46, 51, 52, 53, 55, 56, 58, 59, 60, 64, 69, 75, 76, 104], "managers_path": [2, 52, 75], "total": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 72, 74, 76, 80, 105, 106, 107], "run": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 46, 49, 50, 55, 56, 57, 58, 59, 60, 62, 64, 66, 67, 68, 72, 74, 75, 76, 77, 82, 87, 105, 106, 107, 108, 109, 110, 111], "time": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 48, 49, 50, 52, 53, 54, 55, 56, 58, 60, 61, 62, 66, 67, 69, 72, 74, 75, 76, 77, 79, 80, 81, 82, 99, 104, 105, 106, 107, 108, 109], "script": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 47, 48, 62, 63, 64, 85, 87], "minut": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 111], "000": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 42], "second": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 46, 51, 55, 57, 60, 76, 104, 108], "python": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 47, 48, 62, 64, 72, 76, 77, 87, 109], "sourc": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107], "adastop_exampl": [2, 42], "jupyt": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], "notebook": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 63, 64, 76, 77], "ipynb": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41], "galleri": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 62, 64, 109, 111], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 47, 51, 60, 62, 63, 64, 66, 67, 68, 69, 72, 74, 76, 78, 88, 89, 90, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 110], "sphinx": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 62, 63, 109], "compare_ag": [3, 44], "function": [3, 21, 22, 35, 36, 37, 41, 42, 43, 44, 50, 51, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 72, 74, 76, 78, 80, 82, 84, 85, 86, 87, 105, 106, 107, 109, 111], "numpi": [3, 15, 16, 17, 18, 19, 31, 35, 37, 39, 40, 41, 44, 45, 48, 49, 50, 51, 53, 60, 61, 63, 66, 67, 69, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "np": [3, 15, 16, 17, 18, 19, 31, 35, 37, 39, 40, 41, 44, 45, 49, 50, 53, 61, 69, 72, 74, 82, 84, 89, 91, 93, 94, 95, 105, 106, 107], "comparison": [3, 14, 20, 36, 60, 63, 66, 67, 75, 76, 82, 110, 111], "agentmanag": [3, 44, 63], "rlberry_research": [3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 29, 30, 31, 32, 33, 34, 37, 39, 40, 47, 48, 50, 63, 82], "bernoullibandit": [3, 15, 16], "indexag": [3, 15, 16, 19, 40, 63], "makeboundedmossindex": [3, 16], "makeboundednptsindex": [3, 16], "makeboundeducbindex": [3, 15, 16], "makeetcindex": [3, 16], "paramet": [3, 4, 15, 16, 17, 18, 19, 28, 35, 37, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 59, 61, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 91, 93, 94, 95, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107], "problem": [3, 15, 16, 17, 18, 19, 40, 44, 60], "arrai": [3, 15, 16, 17, 18, 19, 31, 40, 50, 69, 72, 74, 75, 80, 82, 105, 106, 107], "6": [3, 8, 16, 17, 18, 31, 43, 49, 50, 53, 54, 55, 56, 60, 75, 83], "9": [3, 15, 16, 19, 25, 32, 35, 37, 40, 41, 43, 46, 47, 49, 50, 53, 54, 55, 56, 58, 60, 63, 80, 81, 82], "arm": [3, 15, 16, 18, 19, 40], "len": [3, 15, 16, 18, 19, 39, 40], "2000": [3, 15, 16, 53, 54, 56], "horizon": [3, 8, 11, 12, 15, 16, 17, 18, 19, 24, 30, 32, 34, 40, 41, 46, 47, 49, 50, 67, 68, 76], "n": [3, 32, 37, 38, 49, 52, 53, 54, 75, 89, 91, 93, 94, 95, 109], "50": [3, 7, 8, 13, 20, 28, 41, 43, 44, 47, 49, 50, 53, 54, 55, 56, 59, 61, 75, 78, 79], "number": [3, 15, 16, 17, 18, 19, 21, 22, 26, 38, 40, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 55, 56, 58, 59, 60, 61, 63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 86, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "construct": [3, 15, 16, 17, 19, 35, 40, 43, 48, 49, 54, 62, 63, 69, 110], "experi": [3, 15, 16, 17, 18, 19, 35, 36, 40, 42, 43, 50, 52, 53, 61, 62, 63, 66, 67, 69, 75, 76, 79, 82, 84, 85, 108], "p": [3, 15, 16, 31, 37, 43, 44, 50, 53, 54, 78], "class": [3, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 45, 46, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 63, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 84, 88, 89, 91, 92, 93, 94, 95, 96, 98, 99, 100, 102, 104, 105, 106, 107, 108, 109], "ucbag": [3, 16, 19, 40], "name": [3, 12, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 45, 49, 50, 53, 55, 56, 57, 58, 59, 60, 61, 62, 64, 66, 67, 68, 72, 74, 75, 76, 78, 79, 80, 81, 82, 84, 85, 86, 87, 104, 105, 106, 107], "ucb": [3, 14, 16, 20, 36, 40, 66, 67, 76, 82], "def": [3, 4, 15, 16, 17, 18, 19, 35, 37, 38, 40, 41, 45, 46, 49, 50, 53, 57, 67, 87], "__init__": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 53, 57, 66, 67, 68, 72, 74, 105, 106, 107], "self": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 49, 50, 53, 57, 59, 63, 66, 67, 68, 69, 75, 76, 89, 92, 95, 104], "kwarg": [3, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 53, 55, 57, 58, 59, 60, 63, 66, 67, 68, 70, 71, 72, 73, 75, 76, 105, 106, 107], "index": [3, 14, 15, 19, 20, 36, 40, 62, 66, 67, 75, 76, 82, 96], "_": [3, 4, 8, 15, 16, 17, 18, 19, 32, 37, 38, 40, 43, 44, 53, 60, 66, 67, 69, 76], "writer_extra": [3, 15, 16, 17, 18, 19, 40, 41, 49, 58, 66, 67, 68], "reward": [3, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 42, 43, 45, 53, 54, 56, 58, 59, 63, 66, 67, 68, 69, 72, 74, 76, 78, 82, 84, 105, 106, 107, 108, 110, 111], "etcag": [3, 16], "etc": [3, 16, 45, 51, 53, 57, 63, 64, 108], "20": [3, 15, 16, 17, 18, 19, 20, 29, 34, 37, 40, 43, 49, 50, 53, 54, 55, 56, 60, 76, 80, 82, 99], "action_and_reward": [3, 16, 18, 66, 67, 68], "mossag": [3, 16, 40], "moss": [3, 16, 40], "nptsagent": [3, 16], "npt": [3, 16], "tracker_param": [3, 16, 17], "agents_class": [3, 16, 17], "parallel": [3, 16, 17, 19, 40, 46, 55, 57, 63, 76, 77, 82, 83, 84, 108, 111], "process": [3, 16, 17, 19, 38, 40, 43, 57, 59, 60, 61, 62, 63, 66, 67, 68, 76, 77, 82, 83, 84], "mp_context": [3, 16, 17, 19, 40, 48, 76, 77, 82, 83, 84], "fork": [3, 16, 17, 19, 40, 63, 76, 77], "n_fit": [3, 4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 44, 46, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 76, 79, 82, 83, 84], "eval_funct": [3, 78], "eval_budget": [3, 78], "none": [3, 8, 15, 18, 35, 37, 38, 43, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 87, 89, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 104, 105, 106, 107], "agent_id": [3, 44, 76, 78], "df": [3, 56, 59, 80], "get_writer_data": [3, 63, 76], "return": [3, 4, 15, 16, 17, 18, 19, 37, 38, 40, 41, 45, 46, 49, 50, 52, 53, 57, 63, 64, 66, 67, 68, 69, 71, 72, 74, 75, 76, 78, 79, 82, 84, 86, 88, 89, 91, 92, 93, 94, 95, 96, 97, 100, 104, 105, 106, 107], "max": [3, 15, 16, 17, 19, 37, 40, 53, 54, 55, 77], "sum": [3, 43, 67, 68, 75], "loc": [3, 56], "tag": [3, 15, 16, 17, 19, 38, 40, 41, 43, 46, 49, 56, 59, 61, 63, 69, 82, 84, 86, 104, 109], "valu": [3, 18, 21, 22, 35, 37, 43, 44, 45, 49, 50, 53, 55, 56, 58, 60, 61, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 84, 86, 92, 94, 95, 96, 104, 105, 106, 107], "method": [3, 38, 44, 45, 46, 48, 49, 51, 53, 59, 60, 61, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 109], "tukey_hsd": [3, 44, 78], "b": [3, 53, 54, 62, 69, 75, 78, 91], "10_000": [3, 78], "comparison_ag": [3, 42], "show": [4, 15, 16, 17, 18, 19, 38, 39, 40, 41, 43, 44, 45, 53, 56, 57, 59, 79, 80, 81, 82, 108, 111], "how": [4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 35, 38, 40, 41, 43, 44, 45, 49, 52, 64, 69, 72, 74, 80, 81, 82, 105, 106, 107, 108, 110], "pendulum": [4, 54], "gymnasium": [4, 6, 7, 9, 21, 22, 26, 33, 35, 53, 55, 63, 66, 67, 68, 71, 72, 73, 74, 91, 92, 93, 94, 95, 96, 97, 105, 106, 107, 108, 110, 111], "gym": [4, 7, 9, 21, 22, 26, 35, 43, 54, 57, 63, 72, 73, 74, 91, 92, 93, 94, 95, 96, 97, 105, 106, 107], "torch": [4, 6, 7, 9, 10, 21, 22, 26, 33, 46, 48, 51, 55, 58, 60, 62, 63, 64, 66, 76, 82, 83, 84, 90, 109, 110, 111], "sacag": 4, "experimentmanag": [4, 15, 16, 17, 18, 19, 21, 22, 26, 37, 38, 40, 41, 43, 46, 47, 48, 49, 50, 51, 56, 57, 58, 59, 61, 63, 66, 67, 68, 75, 77, 78, 79, 82, 83, 84, 98, 100, 104, 110], "wrap_spac": [4, 63, 72, 73], "true": [4, 11, 16, 17, 18, 21, 22, 32, 33, 35, 37, 38, 40, 43, 44, 46, 47, 49, 50, 53, 55, 57, 59, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 84, 88, 89, 104, 105, 106, 107], "setup": [4, 63], "env_nam": 4, "int": [4, 15, 16, 17, 18, 19, 38, 40, 55, 58, 59, 60, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 82, 84, 89, 90, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "2e5": 4, "f": [4, 7, 9, 35, 38], "wrapper": [4, 6, 7, 9, 11, 21, 22, 24, 26, 29, 30, 33, 34, 53, 54, 55, 57, 58, 61, 63, 70, 71, 74, 108, 109], "timelimit": [4, 6, 33, 72, 74, 105, 106, 107], "max_episode_step": [4, 6, 33, 69], "200": [4, 6, 10, 12, 29, 42, 46, 56, 83], "recordepisodestatist": 4, "creat": [4, 35, 37, 38, 46, 49, 50, 51, 59, 60, 61, 62, 63, 66, 67, 68, 78, 80, 81, 82, 91, 108, 109, 110, 111], "instanc": [4, 45, 46, 47, 49, 51, 53, 55, 57, 58, 59, 60, 61, 62, 63, 66, 67, 68, 72, 74, 75, 76, 77, 79, 82, 84, 104, 105, 106, 107, 108], "xp_manag": [4, 18, 19, 41], "enable_tensorboard": [4, 55, 63, 76], "start": [4, 16, 35, 38, 40, 53, 54, 60, 62, 63, 72, 74, 93, 95, 96, 105, 106, 107, 108, 110], "demo_sac": 4, "demo": [5, 14, 20, 23, 36, 42, 66, 67, 71, 73, 76, 79, 84, 93, 105, 107], "ppo": [5, 21, 22, 36, 42, 43, 44, 53, 55, 56, 58, 60, 61, 63, 68, 73, 75, 79, 83, 110], "algorithm": [5, 14, 17, 19, 20, 25, 35, 36, 37, 40, 42, 43, 44, 49, 50, 52, 53, 56, 57, 58, 60, 61, 63, 64, 66, 67, 68, 72, 73, 74, 76, 78, 80, 81, 82, 105, 106, 107, 108, 109], "pball2d": [5, 23, 36, 48], "valueiter": [5, 25, 36, 50, 53], "chain": [5, 23, 35, 36, 49, 53, 54, 61], "rsucbvi": [5, 24, 36, 47], "mountaincar": [5, 23, 36, 43, 51, 54, 60, 61, 89], "a2c": [5, 35, 36, 42, 44, 56, 57, 61, 63, 68, 73, 75, 79, 110, 111], "sac": [5, 36, 63, 75, 76], "soft": [5, 36, 76], "actor": [5, 36, 43, 76], "critic": [5, 21, 22, 36, 43, 58, 72, 74, 76, 103, 105, 106, 107], "mbqvi": [5, 29, 34, 36], "gridworld": [5, 23, 36, 37, 41, 50, 61, 63, 72, 74, 105, 106, 107], "rskernelucbviag": [5, 36, 105], "acrobot": [5, 23, 30, 36, 42, 52, 54, 61, 68, 70, 73, 75, 79, 83, 105, 110, 111], "dqn": [5, 26, 33, 36, 44, 61, 63, 73, 83], "cartpol": [5, 35, 36, 43, 44, 46, 52, 53, 54, 55, 56, 57, 58, 60, 61, 63, 68, 69, 72, 73, 74, 82, 84, 105, 106, 107], "set": [6, 7, 8, 9, 10, 11, 12, 13, 44, 46, 54, 60, 62, 63, 64, 66, 67, 68, 69, 72, 74, 76, 82, 84, 89, 90, 91, 92, 93, 94, 95, 96, 103, 105, 106, 107, 109, 111], "up": [6, 7, 8, 9, 10, 11, 12, 13, 62, 72, 74, 82, 105, 106, 107, 111], "an": [6, 8, 10, 12, 13, 16, 17, 19, 21, 22, 35, 37, 38, 40, 41, 43, 44, 47, 48, 50, 52, 55, 56, 57, 58, 60, 61, 63, 64, 67, 68, 69, 70, 72, 74, 76, 80, 81, 82, 83, 92, 93, 94, 95, 96, 100, 104, 105, 106, 107, 109], "chosen": [6, 7, 8, 9, 10, 11, 12, 13, 50, 61, 64, 68, 72, 74, 76, 93, 105, 106, 107], "here": [6, 7, 8, 9, 10, 11, 12, 13, 35, 43, 44, 49, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 64, 108, 109, 110], "a2cag": [6, 44, 48, 57, 76, 82, 84], "benchmark": [6, 10, 25, 31, 32, 34, 47, 48, 63, 64, 98, 99, 100, 102], "ball_explor": [6, 10, 31, 48], "256": [6, 22, 33, 48, 76], "n_timestep": 6, "50_000": 6, "gamma": [6, 8, 11, 12, 13, 21, 22, 24, 25, 28, 29, 30, 32, 34, 35, 37, 43, 46, 47, 48, 49, 50, 53, 59, 67, 68, 76], "99": [6, 8, 11, 12, 21, 22, 24, 29, 30, 37, 45, 46, 48, 50, 52, 55, 76], "learning_r": [6, 21, 22, 26, 35, 43, 46, 48, 53, 55, 56, 57], "001": [6, 43, 48], "budget": [6, 7, 9, 10, 11, 12, 18, 24, 30, 33, 37, 38, 41, 45, 49, 50, 53, 55, 57, 58, 59, 60, 66, 67, 68, 76, 99], "enable_rend": [6, 8, 10, 11, 12, 13, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 49, 53, 54, 61, 72, 105, 106, 107], "observ": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 37, 43, 45, 49, 50, 53, 54, 59, 61, 67, 68, 69, 72, 74, 105, 106, 107], "reset": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 45, 49, 50, 53, 54, 59, 61, 67, 68, 69, 72, 74, 104, 105, 106, 107], "tt": [6, 8, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 32, 33, 49, 50, 53, 54, 59, 61], "rang": [6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 44, 45, 49, 50, 53, 54, 59, 60, 61, 67, 69, 80, 82, 87, 105], "action": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 37, 40, 41, 43, 45, 49, 50, 53, 54, 58, 59, 61, 63, 66, 67, 68, 69, 72, 74, 93, 95, 105, 106, 107], "termin": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 43, 45, 49, 50, 53, 54, 59, 61, 63, 67, 69, 72, 74, 105, 106, 107], "truncat": [6, 7, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 45, 49, 50, 53, 54, 59, 61, 67, 69, 72, 74, 105, 106, 107], "step": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 41, 43, 44, 45, 49, 50, 53, 54, 56, 59, 61, 62, 66, 67, 69, 72, 74, 75, 78, 86, 104, 105, 106, 107], "done": [6, 7, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 45, 47, 48, 49, 50, 53, 54, 55, 59, 61, 62, 63, 64, 69, 72, 74, 75, 76, 88, 105, 106, 107], "video": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 53, 54, 72, 105, 106, 107, 109, 110], "save_video": [6, 8, 10, 11, 12, 13, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 53, 54, 62, 72, 105, 106, 107], "_video": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 62], "video_plot_a2c": 6, "mp4": [6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 53, 54, 55, 62], "As": [7, 9, 44, 48, 49, 50, 55, 58, 59, 72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "computation": [7, 9], "intens": [7, 9], "hard": [7, 9], "tune": [7, 9, 21, 22, 26, 33, 43, 44, 55], "one": [7, 9, 41, 44, 46, 49, 53, 54, 55, 56, 59, 61, 62, 63, 64, 66, 67, 69, 72, 74, 76, 78, 80, 81, 82, 84, 94, 99, 105, 106, 107, 108], "tensorboard": [7, 9, 55, 63, 66, 67, 68, 76, 86, 104, 108, 109, 110, 111], "visual": [7, 9, 43, 49, 56, 57, 75, 109, 110], "command": [7, 9, 62], "logdir": [7, 9, 55], "path": [7, 9, 54, 55, 56, 59, 62, 63, 66, 67, 68, 75, 76, 78, 82, 84, 86], "writer": [7, 9, 18, 38, 41, 46, 57, 59, 61, 63, 66, 67, 68, 76, 82, 84, 110], "log_dir": [7, 9, 63], "parent": [7, 9, 53, 54, 86, 104], "util": [7, 9, 21, 22, 26, 33, 35, 41, 46, 54, 58, 59, 61, 63, 64], "summarywrit": [7, 9, 63, 66, 67, 68, 76, 104], "dqnagent": [7, 23, 36, 44, 71, 76, 82, 83, 84], "log": [7, 9, 41, 46, 48, 55, 56, 57, 63, 66, 67, 68, 72, 74, 76, 86, 104, 105, 106, 107, 110], "configure_log": [7, 9], "record_video": [7, 9, 21, 22, 26, 53, 54, 55], "recordvideo": [7, 9, 21, 22, 26, 53, 54, 55, 61], "shutil": [7, 9, 21, 22, 26], "o": [7, 9, 21, 22, 26, 76], "level": [7, 9, 44, 48, 52, 53, 54, 61, 63, 64, 68, 75, 76, 78, 80, 81, 82, 85, 103, 110], "render_mod": [7, 9, 21, 22, 26, 53, 54, 55, 61, 71, 72, 74, 105, 106, 107], "rgb_arrai": [7, 9, 21, 22, 26, 53, 54, 55, 61, 72, 74, 105, 106, 107], "epsilon_decay_interv": [7, 9], "1000": [7, 9, 33, 35, 38, 55, 56, 57, 58, 60, 76, 82], "set_writ": [7, 9, 66, 67, 68, 76], "temp": [7, 9, 15, 18, 21, 22, 26, 37, 38, 41, 43, 55, 56, 58, 59], "episod": [7, 9, 21, 22, 26, 43, 45, 46, 47, 49, 53, 54, 55, 59, 61, 66, 67, 68, 69, 72, 74, 76, 82, 84, 104, 105, 106, 107], "3": [7, 8, 9, 15, 18, 19, 28, 31, 35, 37, 41, 43, 49, 50, 53, 54, 55, 56, 58, 60, 61, 72, 74, 76, 80, 87, 104, 105, 106, 107], "fals": [7, 9, 11, 16, 18, 21, 22, 25, 26, 32, 33, 40, 41, 43, 44, 45, 47, 50, 53, 54, 55, 57, 58, 59, 60, 61, 68, 69, 71, 72, 73, 76, 79, 80, 81, 82, 84, 85, 87, 88, 89, 102, 104], "while": [7, 9, 37, 38, 45, 53, 64, 69], "close": [7, 9, 21, 22, 26, 53, 54, 72, 74, 105, 106, 107], "need": [7, 9, 21, 22, 26, 38, 44, 46, 47, 48, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61, 62, 64, 72, 74, 76, 80, 82, 91, 92, 93, 94, 95, 96, 105, 106, 107, 109, 110], "move": [7, 9, 21, 22, 26, 49, 50, 63, 72, 74, 105, 106, 107], "final": [7, 9, 18, 21, 22, 26, 35, 37, 43, 50, 110, 111], "insid": [7, 9, 21, 22, 26, 53, 55, 58, 59, 60, 63, 64, 84, 91, 96], "folder": [7, 9, 21, 22, 26, 41, 54, 56, 59, 62, 64, 77, 82, 84, 86], "document": [7, 9, 21, 22, 26, 41, 45, 60, 63, 110, 111], "renam": [7, 9, 21, 22, 26, 63], "rl": [7, 9, 21, 22, 26, 35, 44, 57, 61, 63, 64, 110, 111], "video_plot_dqn": 7, "rmtree": [7, 9, 21, 22, 26], "rlberry_scool": [8, 13, 25, 27, 28, 29, 32, 34, 41, 49, 53, 54, 59, 60, 63, 100], "mbqviagent": [8, 29, 34], "finit": [8, 13, 27, 28, 37, 50, 53, 54], "param": [8, 16, 46, 47, 66, 67, 68, 72, 74, 76, 105, 106, 107], "n_sampl": [8, 29, 34], "100": [8, 18, 39, 41, 46, 47, 49, 50, 53, 54, 55, 56, 58, 60, 61, 67, 69, 76, 83, 87], "sampl": [8, 14, 17, 18, 20, 27, 32, 36, 37, 46, 49, 50, 51, 52, 53, 54, 60, 63, 66, 67, 68, 69, 72, 74, 76, 78, 82, 91, 92, 93, 94, 95, 96, 105, 106, 107], "per": [8, 43, 50, 94], "state": [8, 18, 21, 22, 28, 37, 43, 45, 50, 53, 63, 72, 74, 97, 105, 106, 107], "pair": [8, 72, 74, 104, 105, 106, 107], "7": [8, 17, 18, 21, 28, 41, 49, 50, 53, 54, 55, 56, 60], "wall": [8, 28, 37, 41, 50, 61], "success_prob": [8, 37, 41, 50], "evalu": [8, 21, 22, 26, 35, 37, 43, 44, 45, 47, 49, 50, 53, 55, 57, 58, 60, 66, 67, 68, 70, 75, 76, 78, 79, 80, 82, 108, 110], "determinist": [8, 35, 51, 67, 68], "version": [8, 21, 22, 26, 49, 53, 54, 59, 62, 72, 74, 76, 87, 105, 106, 107, 110], "env_ev": 8, "next_": 8, "video_plot_mbqvi": 8, "munchausendqnag": 9, "munchausen": [9, 63], "5": [9, 15, 17, 18, 19, 25, 27, 31, 35, 37, 39, 40, 41, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 67, 68, 69, 71, 75, 76, 79, 80, 89], "video_plot_mdqn": 9, "ppoagent": [10, 23, 36, 44, 71, 76, 83], "n_step": [10, 21, 22, 35, 43, 48, 55], "3e3": 10, "video_plot_ppo": 10, "rskernelucbvi": 11, "rescalerewardwrapp": [11, 24, 30, 70], "rescak": 11, "300": [11, 24, 30, 55], "bonus_scale_factor": [11, 12, 24, 30, 47], "01": [11, 20, 24, 26, 30, 38, 42, 52, 53, 56, 75, 87], "min_dist": [11, 24, 30, 47], "bandwidth": [11, 80, 82], "05": [11, 44, 55, 56, 78], "beta": [11, 15, 44, 63, 69, 75], "kernel_typ": [11, 39], "gaussian": [11, 14, 20, 36, 39, 61, 63, 66, 67, 76, 78, 80, 81, 82], "500": [11, 12, 21, 22, 26, 33, 38, 41, 42, 43, 44, 46, 52, 55, 56, 59, 61, 69, 76, 79, 82, 83, 84], "time_before_don": 11, "achiev": [11, 72, 74, 105, 106, 107, 108], "goal": [11, 43, 49, 50, 72, 74, 105, 106, 107, 108], "first": [11, 37, 38, 44, 49, 51, 52, 55, 56, 59, 60, 61, 62, 66, 67, 72, 74, 75, 76, 96, 104, 105, 106, 107, 108, 109], "video_plot_rs_kernel_ucbvi": 11, "rsucbviag": [12, 23, 30, 36, 47, 51, 89, 105], "classic_control": [12, 29, 33], "170": [12, 56], "r": [12, 18, 21, 22, 37, 43, 49, 50, 67, 68, 91], "ucbvi": [12, 49, 63], "video_plot_rsucbvi": 12, "dynprog": [13, 25, 28, 32, 53], "valueiterationag": [13, 23, 25, 32, 36, 37, 50, 53], "95": [13, 21, 22, 28, 46, 50, 53, 55, 56, 59, 63, 80, 81, 82], "break": [13, 21, 22, 25, 26, 28, 32, 37, 53, 59], "video_plot_vi": 13, "cumul": [14, 15, 16, 20, 36, 40, 41, 43, 49, 66, 67, 76, 82, 84], "regret": [14, 15, 16, 20, 36, 40, 49, 66, 67, 76, 82], "exp3": [14, 16, 20, 36, 66, 67, 76, 82], "thompson": [14, 17, 20, 36, 63, 66, 67, 76, 82], "bernoulli": [14, 17, 20, 36, 66, 67, 76, 82], "subplot": [14, 20, 36, 39, 40, 41, 56, 66, 67, 75, 76, 82], "variou": [14, 20, 36, 61, 66, 67, 76, 82], "base": [14, 19, 20, 36, 37, 40, 45, 53, 63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 82, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "bai": [14, 20, 36, 66, 67, 76, 84, 93], "real": [14, 20, 36, 66, 67, 76, 84, 93, 109], "dataset": [14, 20, 35, 36, 66, 67, 76, 84, 93], "select": [14, 16, 20, 36, 43, 49, 53, 58, 66, 67, 76, 84, 93], "mirror": [14, 20, 36, 66, 67, 76, 84, 93], "In": [15, 18, 37, 43, 44, 49, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 72, 74, 80, 81, 82, 91, 105, 106, 107, 108, 110, 111], "prior": [15, 17, 63], "compar": [15, 16, 36, 37, 42, 43, 44, 50, 57, 60, 61, 68, 73, 75, 78, 79, 110, 111], "bound": [15, 72, 74, 91, 105, 106, 107], "support": [15, 62, 72, 74, 76, 96, 105, 106, 107, 109], "For": [15, 43, 44, 45, 46, 49, 53, 54, 55, 56, 57, 58, 59, 60, 62, 64, 66, 67, 69, 72, 74, 82, 84, 87, 94, 105, 106, 107, 109, 111, 112], "sub": [15, 16, 50, 62, 69, 92, 96], "12": [15, 18, 37, 38, 41, 43, 50, 53, 54, 55, 56, 63], "45": [15, 43, 50, 54, 55, 56], "save": [15, 18, 21, 22, 24, 26, 28, 30, 33, 37, 38, 41, 43, 47, 53, 55, 56, 57, 58, 62, 63, 66, 67, 68, 72, 76, 77, 80, 81, 82, 84, 105, 106, 107, 108, 110], "pickl": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59, 63, 66, 67, 68, 77, 82, 84], "rlberry_data": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59], "manager_data": [15, 18, 37, 38, 41, 43, 55, 56, 58, 76], "agent_2024": [15, 38], "16_12": [15, 18, 37, 38, 41], "04_319cb7e6": 15, "manager_obj": [15, 18, 37, 38, 41, 43, 55, 56, 58, 59, 63], "max_work": [15, 37, 38, 43, 49, 55, 56, 58, 59, 60, 63, 76, 77, 83], "04_279a356f": 15, "15_f7d90f4f": 15, "15_9c0d4b7a": 15, "normalbandit": [15, 19, 40], "tsagent": [15, 17], "makesubgaussianucbindex": [15, 19, 40], "makebetaprior": [15, 17], "makegaussianprior": 15, "plot_writer_data": [15, 16, 17, 19, 38, 40, 41, 43, 46, 49, 56, 58, 61, 63], "definit": [15, 16, 17, 18, 19, 40, 44], "bernoullitsag": [15, 17], "boundeducbag": 15, "8": [15, 18, 22, 35, 43, 44, 46, 49, 50, 53, 54, 55, 56, 59, 60], "mc": [15, 16, 17, 19, 40, 66], "simu": [15, 16, 17, 19, 40], "comput": [15, 16, 17, 19, 37, 40, 44, 49, 52, 59, 61, 62, 63, 64, 66, 67, 72, 74, 75, 76, 80, 81, 82, 105, 106, 107, 108, 109], "plot": [15, 16, 17, 19, 36, 38, 42, 43, 46, 49, 50, 52, 55, 56, 58, 60, 63, 66, 67, 76, 79, 80, 81, 82, 108, 110], "pseudo": [15, 16, 17, 19, 40], "compute_pseudo_regret": [15, 16, 17, 19, 40], "cumsum": [15, 16, 17, 19, 40, 41, 50, 82, 84], "astyp": [15, 16, 19, 40], "output": [15, 16, 17, 19, 21, 22, 26, 37, 40, 41, 49, 50, 53, 54, 57, 58, 59, 60, 63, 66, 67, 68, 76, 77, 86, 108, 110], "preprocess_func": [15, 16, 17, 19, 40, 41, 82, 84], "gaussiantsag": 15, "sigma": [15, 19, 40], "gaussianucbag": 15, "ones": [15, 19, 40], "350": [15, 20, 56, 63], "plot_ts_bandit": [15, 20], "sever": [16, 35, 38, 44, 46, 48, 49, 50, 52, 57, 61, 78, 80, 81, 82, 108], "product": [16, 60, 91], "also": [16, 45, 48, 51, 56, 60, 61, 62, 64, 66, 67, 72, 73, 74, 82, 89, 105, 106, 107, 108, 110, 111], "home": [16, 40], "runner": [16, 40], "main": [16, 40, 47, 48, 49, 52, 62, 63, 64, 72, 76, 104, 111], "616": [16, 40], "userwarn": [16, 40, 54], "No": [16, 40, 44, 50, 54], "artist": [16, 40], "label": [16, 40, 64], "found": [16, 40, 52, 76, 108], "put": [16, 21, 22, 26, 37, 40, 43, 45, 53], "legend": [16, 40], "note": [16, 40, 44, 46, 49, 52, 57, 66, 67, 68, 69, 72, 74, 76, 82, 84, 105, 106, 107], "whose": [16, 40, 61, 67, 69, 82], "underscor": [16, 40, 104], "ignor": [16, 40], "when": [16, 18, 38, 40, 43, 44, 48, 51, 52, 57, 59, 60, 61, 62, 63, 64, 66, 67, 72, 74, 75, 76, 78, 97, 104, 105, 106, 107], "call": [16, 35, 38, 40, 44, 46, 48, 49, 52, 55, 58, 59, 60, 61, 63, 64, 66, 67, 69, 72, 74, 76, 77, 79, 80, 81, 82, 88, 105, 106, 107], "argument": [16, 40, 55, 58, 60, 61, 63, 66, 67, 68, 71, 73, 76, 89, 96, 98, 99, 100, 102, 109], "plt": [16, 18, 19, 39, 40, 41, 56, 61, 75, 79, 80, 81, 82], "matplotlib": [16, 18, 19, 39, 40, 41, 56, 61, 75, 79, 80, 81, 82, 109], "pyplot": [16, 18, 19, 39, 40, 41, 56, 61, 79], "randomizedag": [16, 17], "makeboundedimedindex": 16, "makeboundeducbvindex": 16, "makeexp3index": [16, 17], "ucbvag": 16, "ucbv": 16, "imedag": 16, "im": 16, "exp3ag": [16, 17], "prob": [16, 17, 35], "seed": [16, 18, 30, 34, 35, 38, 43, 44, 47, 48, 55, 56, 57, 58, 59, 61, 63, 66, 67, 68, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 86, 91, 92, 93, 94, 95, 96, 97, 100, 105, 106, 107, 110, 111], "42": [16, 30, 50, 53, 55, 56, 58, 60, 61, 75, 79, 82, 83, 84, 96], "should": [16, 17, 19, 21, 22, 26, 40, 51, 59, 60, 61, 62, 63, 64, 66, 67, 72, 74, 80, 81, 82, 84, 105, 106, 107], "give": [16, 17, 18, 19, 35, 40, 50, 55, 56, 58, 59, 60, 61, 62, 108, 109], "even": [16, 17, 19, 38, 40, 72, 74, 89, 105, 106, 107], "compute_regret": 16, "linestyl": [16, 80, 81, 82], "each": [16, 43, 44, 47, 48, 49, 50, 51, 52, 60, 61, 62, 63, 64, 67, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 91, 92, 95, 96, 104, 105, 106, 107, 108], "compute_na": 16, "fig": [16, 19, 39, 40, 41, 75], "ax": [16, 19, 39, 40, 41, 56, 75, 80, 81, 82], "sharei": 16, "figsiz": [16, 19, 39, 40, 75], "ravel": [16, 40], "lambda": [16, 63, 78], "na": [16, 59], "str": [16, 21, 22, 26, 56, 66, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 84, 85, 87, 91, 92, 98, 99, 100, 102, 103, 104, 105, 106, 107], "tight_layout": [16, 40], "41": [16, 20, 50, 53, 56], "099": [16, 20], "plot_compare_index_bandit": [16, 20], "defin": [17, 19, 37, 40, 43, 44, 50, 54, 63, 70, 72, 73, 74, 76, 77, 89, 104, 105, 106, 107, 109, 110, 111], "random": [17, 35, 37, 43, 44, 49, 50, 51, 53, 60, 61, 66, 67, 68, 69, 72, 74, 75, 76, 78, 79, 88, 89, 90, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "adversarialbandit": 17, "3000": [17, 19, 40, 53, 56], "switching_reward": 17, "gap": 17, "rate": [17, 52, 62], "adversari": 17, "switch": [17, 63], "over": [17, 21, 22, 26, 49, 61, 67, 68, 76, 78, 108, 109], "exponenti": [17, 91], "long": [17, 59, 62, 63, 64], "phase": 17, "inspir": 17, "zimmert": 17, "julian": 17, "yevgeni": 17, "seldin": 17, "tsalli": 17, "inf": [17, 18], "optim": [17, 18, 21, 22, 26, 33, 37, 43, 49, 50, 51, 53, 55, 57, 66, 67, 68, 76, 108, 109], "stochast": [17, 21, 22, 26, 43, 49, 52, 60, 67, 108], "j": [17, 35, 43, 44], "mach": 17, "re": [17, 59, 60, 62, 76, 77], "22": [17, 40, 42, 43, 50, 53, 54, 55, 56, 63], "28": [17, 50, 53, 54, 55, 56, 59, 60], "zero": [17, 37, 38, 45, 50, 53], "exp": [17, 39], "high_reward": 17, "floor": [17, 18], "els": [17, 18, 43, 53, 54, 59, 92], "selected_reward": 17, "enumer": [17, 39, 40, 61, 79], "axi": [17, 37, 50, 61, 63, 80, 81, 82], "662": [17, 20], "plot_exp3_bandit": [17, 20], "exempl": 18, "sequenti": [18, 52, 75, 108], "halv": [18, 76], "find": [18, 43, 46, 53, 54, 55, 59, 63, 108, 110], "best": [18, 76], "server": [18, 63], "ubuntu": [18, 53, 54], "among": [18, 49, 66, 67, 68, 76], "choic": [18, 78], "french": 18, "quirck": 18, "applic": 18, "possibl": [18, 43, 44, 48, 49, 52, 61, 63, 64, 72, 74, 75, 80, 82, 93, 95, 104, 105, 106, 107, 108], "timeout": [18, 46, 57, 76], "ping": 18, "handl": [18, 44, 48, 51, 63, 104], "median": 18, "instead": [18, 48, 49, 53, 56, 61, 63, 64, 66, 76, 104], "object": [18, 46, 51, 60, 66, 67, 68, 69, 72, 74, 76, 77, 79, 88, 89, 90, 92, 104, 105, 106, 107, 108], "three": [18, 43], "part": [18, 59, 60, 62, 108], "46": [18, 50, 53, 55, 56, 60], "worker": [18, 43, 53, 55, 56, 59, 76, 77], "max_global_step": [18, 43, 53, 55, 56, 59], "sh": 18, "625": 18, "458": [18, 55], "11": [18, 50, 53, 54, 55, 56, 59, 63, 87], "306": 18, "61": [18, 50, 56], "27": [18, 50, 53, 55, 56], "301": 18, "36": [18, 20, 43, 50, 53, 54, 55, 56], "309": [18, 56], "4": [18, 19, 21, 22, 26, 37, 39, 41, 43, 46, 48, 49, 50, 53, 54, 55, 56, 57, 58, 60, 76, 78, 80, 83, 86], "303": 18, "56": [18, 50, 53, 54, 56], "66": [18, 50, 53, 55, 56], "73": [18, 50, 53, 55], "302": 18, "83": [18, 43, 44, 50, 55, 56, 63], "93": [18, 50, 53, 54, 55, 56], "sh_2024": 18, "06_13e762a8": 18, "fastest": 18, "read_writer_data": [18, 49, 56, 63], "interfac": [18, 45, 48, 53, 58, 63, 66, 67], "model": [18, 35, 54, 56, 57, 60, 61, 72, 78, 80, 81, 82, 105, 106, 107], "banditwithsimplepolici": [18, 63], "space": [18, 37, 40, 43, 63, 72, 73, 74, 105, 106, 107], "request": [18, 64], "logger": [18, 35, 54, 57, 64, 68, 76, 103, 110], "mirrors_ubuntu": 18, "lafibr": 18, "ikoula": 18, "ovh": 18, "net": 18, "miroir": 18, "univ": 18, "lorrain": 18, "fr": 18, "nant": 18, "ftp": 18, "u": [18, 49, 50, 51, 52, 61, 62, 108], "picardi": 18, "reim": 18, "www": [18, 53, 54], "lip6": 18, "pub": 18, "linux": [18, 48, 53, 54, 62], "distribut": [18, 21, 22, 26, 44, 52, 75, 78, 91], "archiv": [18, 63], "get_tim": 18, "try": [18, 43, 54, 88], "resp": 18, "get": [18, 43, 44, 50, 52, 54, 58, 59, 61, 66, 67, 68, 72, 74, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "elaps": [18, 56, 82], "total_second": 18, "except": [18, 48, 72, 74, 100, 105, 106, 107], "mirrorbandit": 18, "respons": 18, "meant": 18, "On": [18, 61], "neg": [18, 72, 74, 91, 105, 106, 107], "wait": 18, "reach": [18, 72, 74, 105, 106, 107], "mirror_ubuntu": 18, "warn": [18, 28, 43, 53, 54, 55, 60, 63, 64, 76, 77, 82, 89, 92, 96, 103, 104, 109], "queri": 18, "infinit": 18, "url_id": 18, "list": [18, 35, 47, 55, 56, 58, 61, 62, 70, 72, 74, 75, 76, 78, 79, 82, 84, 87, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "subset": 18, "provid": [18, 37, 43, 51, 54, 56, 57, 63, 66, 67, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110, 111], "all": [18, 35, 36, 44, 46, 49, 50, 51, 54, 55, 56, 57, 59, 60, 61, 62, 64, 66, 67, 68, 69, 75, 76, 80, 81, 82, 86, 92, 93, 95, 96, 108, 110], "mirrorenv": 18, "url_list": 18, "n_arm": 18, "action_spac": [18, 27, 32, 37, 49, 50, 53, 54, 67, 69, 73, 74, 88], "discret": [18, 43, 95, 106], "associ": [18, 62, 69, 104], "exist": [18, 44, 53, 54, 72, 74, 88, 105, 106, 107], "assert": [18, 44, 60, 108], "default": [18, 28, 41, 43, 48, 50, 58, 59, 63, 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 84, 85, 87, 88, 89, 91, 92, 93, 94, 95, 98, 99, 100, 102, 104, 105, 106, 107], "seqhalvag": 18, "active_set": 18, "arang": 18, "logk": 18, "ceil": 18, "log2": 18, "ep": [18, 45, 49, 53, 67], "tr": 18, "k": [18, 52, 75, 92], "reward_est": 18, "estim": [18, 43, 49, 50, 67, 68, 75, 80, 82], "half_len": 18, "argsort": 18, "optimal_act": 18, "add_scalar": [18, 38, 104], "onli": [18, 35, 38, 41, 43, 46, 48, 49, 53, 54, 55, 56, 58, 60, 61, 62, 63, 64, 66, 67, 68, 75, 76, 77, 80, 81, 82, 84, 89, 91, 92, 93, 94, 95, 96, 104, 107, 110], "iter": [18, 37, 43, 49, 53, 55, 56, 58, 61, 66, 67, 75, 104], "faster": [18, 43, 48, 49, 62, 66, 67, 104], "doc": [18, 28, 35, 48, 51, 55, 62, 63, 64, 69, 76, 89, 109], "preprocess_tag": [18, 84], "boxplot": [18, 52, 61], "xlabel": [18, 50], "ylabel": [18, 50], "agent_handl": [18, 59, 76], "308": [18, 20, 63], "plot_mirror_bandit": [18, 20], "subgaussian": [19, 40], "figur": [19, 52, 56, 79, 80, 81, 82], "gca": 19, "902": [19, 20], "plot_ucb_bandit": [19, 20], "320": 20, "execut": [20, 38, 42, 44, 48, 62, 66, 67, 68, 72, 74, 104, 105, 106, 107], "auto_examples_demo_bandit": 20, "file": [20, 42, 56, 57, 59, 62, 63, 64, 66, 67, 68, 72, 76, 77, 82, 84, 85, 86, 87, 105, 106, 107, 111], "bandit": [20, 42, 61, 63, 66, 67, 76, 78, 82, 84, 93], "00": [20, 42, 53, 54, 56], "mb": [20, 42, 53, 54], "07": [20, 54, 56], "04": [20, 43, 53, 54, 55, 58, 63], "render": [21, 22, 24, 25, 26, 27, 28, 31, 32, 33, 34, 49, 53, 54, 61, 62, 63, 72, 74, 105, 106, 107], "slightli": [21, 22, 26, 33], "just": [21, 22, 26, 33, 38, 44, 57, 59, 62], "purpos": [21, 22, 26, 33, 43, 46, 64, 110], "datetim": [21, 22, 26], "atari_mak": [21, 22, 26, 54], "model_factory_from_env": [21, 22, 26, 33], "initial_tim": [21, 22, 26], "now": [21, 22, 26, 46, 48, 55, 61, 62, 63, 72, 74, 105, 106, 107], "init": [21, 22, 26, 43, 44, 55, 58, 60], "policy_mlp_config": [21, 22], "type": [21, 22, 26, 33, 44, 51, 56, 58, 61, 62, 64, 67, 68, 69, 76, 83, 86, 91, 92, 94, 95, 96], "multilayerperceptron": [21, 22, 26, 33], "network": [21, 22, 26, 61, 63], "architectur": [21, 22, 26], "layer_s": [21, 22, 26, 33], "512": [21, 22, 26], "dimens": [21, 22, 26, 43], "reshap": [21, 22, 26, 33], "is_polici": [21, 22, 26], "critic_mlp_config": [21, 22], "out_siz": [21, 22], "approxim": [21, 22, 75, 78], "policy_config": [21, 22], "convolutionalnetwork": [21, 22, 26], "relu": [21, 22, 26], "in_channel": [21, 22, 26], "in_height": [21, 22, 26], "84": [21, 22, 26, 43, 50, 55, 56], "in_width": [21, 22, 26], "head_mlp_kwarg": [21, 22, 26], "transpose_ob": [21, 22, 26], "critic_config": [21, 22], "tuned_xp": [21, 22, 26, 43], "al": [21, 22, 26, 54, 71, 109], "v5": [21, 22, 26, 54, 71], "solv": [21, 22, 26, 35, 38, 43, 53, 62, 72, 74, 105, 106, 107], "hyperparamet": [21, 22, 26, 37, 43, 51, 53, 55, 57, 66, 67, 68, 76, 108, 109], "batch_siz": [21, 22, 26, 43, 46, 48, 55, 69], "64": [21, 43, 50, 55, 56, 58], "optimizer_typ": [21, 22], "adam": [21, 22], "what": [21, 22, 44, 49, 50, 52, 56, 58, 60, 61, 62, 64, 69, 80, 81], "gradient": [21, 22, 26, 43], "descent": [21, 22, 26, 43], "1e": [21, 26, 37, 43, 45, 46, 50, 53, 57], "size": [21, 22, 26, 52, 53, 54, 75, 82], "policy_net_fn": [21, 22], "constructor": [21, 22, 46, 47, 51, 55, 58, 59, 60, 66, 67, 68, 69, 73, 76, 98, 99, 100, 102], "policy_net_kwarg": [21, 22], "architecur": [21, 22], "value_net_fn": [21, 22], "value_net_kwarg": [21, 22], "n_env": [21, 22], "gae_lambda": [21, 22, 43], "clip_ep": [21, 22], "k_epoch": [21, 22], "1024": 21, "10_000_000": [21, 22], "interact": [21, 22, 26, 43, 45, 53, 54, 56, 67], "between": [21, 22, 26, 43, 44, 49, 53, 60, 104, 108], "dure": [21, 22, 26, 36, 42, 43, 55, 57, 58, 59, 61, 66, 67, 72, 74, 76, 82, 105, 106, 107, 109, 110, 111], "eval_kwarg": [21, 22, 26, 37, 43, 44, 46, 47, 49, 50, 55, 57, 58, 60, 61, 76, 79, 82, 83, 84], "eval_horizon": [21, 22, 26, 37, 43, 44, 46, 47, 49, 50, 55, 57, 58, 60, 61, 67, 68, 76, 79, 82, 83, 84], "usual": [21, 22, 26, 43, 72, 74, 105, 106, 107, 108], "good": [21, 22, 26, 43, 46, 53, 60, 61, 62], "do": [21, 22, 26, 43, 44, 49, 50, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 75, 76, 79, 80, 81, 82, 84, 88, 90, 108, 110], "more": [21, 22, 26, 28, 38, 41, 43, 47, 49, 53, 54, 55, 56, 58, 59, 61, 62, 63, 64, 69, 75, 76, 109, 110, 111], "than": [21, 22, 26, 43, 44, 48, 56, 59, 61, 64, 69, 80, 82, 91, 94, 95], "becaus": [21, 22, 26, 28, 38, 41, 43, 50, 58, 61], "ppo_tun": [21, 22], "output_dir": [21, 22, 26, 38, 55, 57, 59, 63, 66, 67, 68, 76, 82, 84], "ppo_for_atlanti": 21, "final_train_tim": [21, 22, 26], "metadata": [21, 22, 26, 53, 54, 72, 74, 104, 105, 106, 107], "mode": [21, 22, 26, 56, 72, 74, 105, 106, 107], "bug": [21, 22, 26, 63, 64], "some": [21, 22, 26, 40, 43, 44, 47, 48, 52, 54, 56, 59, 60, 61, 62, 63, 64, 72, 74, 83, 91, 105, 106, 107, 110], "30000": [21, 22, 26], "get_agent_inst": [21, 22, 26, 59, 63, 76], "final_test_tim": [21, 22, 26], "example_plot_atari_atlantis_vectorized_ppo": 21, "begin": [21, 22, 26, 44, 80, 108], "example_atari_atlantis_vectorized_ppo": 21, "5e": 22, "128": [22, 54, 56], "ppo_for_breakout": 22, "example_plot_atari_breakout_vectorized_ppo": 22, "example_atari_breakout_vectorized_ppo": 22, "applegold": [23, 36], "twinroom": [23, 36], "oldgymcompatibilitywrapp": [23, 36, 105], "old_acrobot": [23, 36, 105, 107], "room": [23, 36, 47], "springcartpol": [23, 36, 63], "atari": [23, 36, 63, 71, 76, 109, 110], "freewai": [23, 36, 71, 76], "atlanti": [23, 36, 71, 76], "vector": [23, 36, 63, 71, 76], "breakout": [23, 36, 54, 71, 76], "rescal": [24, 105], "n_episod": [24, 30, 45, 53], "25": [24, 30, 31, 37, 50, 52, 53, 54, 55, 56, 59, 60, 72, 74, 105, 106, 107], "video_plot_acrobot": 24, "grid_explor": [25, 32, 47], "apple_gold": 25, "reward_fre": [25, 47], "array_observ": [25, 47], "video_plot_apple_gold": 25, "mlp_config": 26, "cnn_config": 26, "q_net_constructor": [26, 33], "q_net_kwarg": [26, 33], "max_replay_s": [26, 69], "50000": 26, "32": [26, 43, 46, 50, 53, 55, 56, 60, 69], "learning_start": 26, "25000": 26, "gradient_step": 26, "epsilon_fin": 26, "chunk_siz": [26, 69], "90000": 26, "dqn_tune": 26, "dqn_for_freewai": 26, "video_plot_atari_freewai": 26, "video_plot_chain": 27, "ofvalueiter": 28, "never": [28, 62, 72, 74, 91, 94, 95, 105, 106, 107], "present": [28, 43, 62, 96, 111], "see": [28, 38, 41, 43, 44, 46, 47, 48, 49, 51, 55, 58, 59, 60, 61, 62, 63, 66, 67, 69, 72, 74, 75, 76, 80, 81, 89, 105, 106, 107, 108, 111], "inform": [28, 41, 43, 45, 49, 53, 54, 56, 58, 61, 62, 64, 66, 67, 68, 72, 74, 76, 105, 106, 107, 108, 109, 110, 111], "video_plot_gridworld": 28, "framer": [28, 62, 72, 105, 106, 107], "discretizestatewrapp": [29, 34], "_env": [29, 106], "40": [29, 50, 53, 54, 55, 56], "video_plot_montain_car": 29, "video_plot_mountain_car": 29, "old": [30, 59, 63, 104, 107], "old_env": 30, "gym_util": 30, "video_plot_old_gym_acrobot": 30, "video_plot_old_gym_compatibility_wrapper_old_acrobot": 30, "reward_amplitud": 31, "reward_smooth": 31, "reward_cent": 31, "75": [31, 37, 50, 55, 56], "co": 31, "pi": [31, 35, 40, 43, 61, 80, 81, 82], "sin": 31, "action_list": 31, "ii": [31, 34, 39, 44], "video_plot_pbal": 31, "nroom": [32, 47], "remove_wal": 32, "room_siz": 32, "initial_state_distribut": 32, "center": [32, 50], "include_trap": 32, "observation_spac": [32, 37, 53, 72, 73, 74, 88, 105, 106, 107], "999": 32, "video_plot_room": 32, "time_limit": 33, "model_config": 33, "obs_tran": 33, "swing_up": 33, "1e5": [33, 43, 44, 48, 61, 79], "video_plot_springcartpol": 33, "discretize_st": 34, "seeder": [34, 51, 59, 60, 61, 66, 67, 68, 72, 74, 76, 88, 90, 91, 93, 94, 95, 105, 106, 107, 108], "123": [34, 38, 47, 51, 57, 59, 60, 61, 89], "n_bin": [34, 106], "rese": [34, 51, 57, 60, 61, 66, 67, 68, 72, 73, 74, 88, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "video_plot_twinroom": 34, "with_venv": 35, "decor": [35, 87], "order": [35, 49, 50, 52, 57, 61, 70, 92, 108], "automat": [35, 41, 48, 49, 51, 57, 59, 62, 63, 64, 72, 74, 82, 105, 106, 107, 109], "experiment": [35, 48, 63, 108], "separ": [35, 61, 76], "compil": [35, 62], "via": [35, 48, 56, 61, 63], "run_venv_xp": [35, 87], "run_sb": 35, "run_mushroom": 35, "directli": [35, 50, 57, 61, 63, 80, 82], "text": [35, 44, 52, 62, 72, 74, 105, 106, 107], "import_lib": [35, 87], "want": [35, 41, 48, 49, 50, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 72, 74, 75, 76, 84, 97, 105, 106, 107, 108, 110, 111], "contain": [35, 56, 59, 61, 66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 87, 91, 92, 93, 94, 95, 96, 105, 106, 107, 109], "mushroom_rl": 35, "taken": [35, 52, 66, 67], "venv_dir_nam": [35, 85, 87], "rlberry_venv": [35, 85, 87], "simpl": [35, 37, 38, 43, 44, 45, 49, 50, 53, 61, 63, 67, 108, 109], "q": [35, 37, 43, 49, 53, 54, 59, 63], "qlearn": [35, 53], "core": [35, 53, 54, 72, 108, 110], "generate_simple_chain": 35, "epsgreedi": 35, "compute_j": 35, "__name__": [35, 38, 48, 76, 79, 82, 84], "results_dir": 35, "strong_lin": 35, "mdp": [35, 43, 63, 72, 74, 105, 106, 107], "state_n": 35, "goal_stat": 35, "rew": 35, "epsilon": [35, 37, 50, 59], "15": [35, 39, 50, 53, 55, 56, 63, 78], "algorithm_param": 35, "10000": [35, 53, 55, 75, 78], "n_steps_per_fit": 35, "stabl": [35, 43, 51, 69, 71, 89, 108, 109, 110, 111, 112], "baselines3": [35, 71, 109, 110], "python_v": [35, 87], "make": [35, 44, 46, 52, 53, 54, 55, 57, 62, 63, 64, 66, 67, 68, 72, 73, 74, 76, 78, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108, 110], "total_timestep": [35, 38, 43, 53, 55, 56, 58], "1_500": 35, "vec_env": 35, "get_env": 35, "ob": [35, 69], "cum_reward": 35, "_state": 35, "predict": [35, 60, 80, 81, 82], "__main__": [35, 38, 48, 76, 79, 82, 84], "collect": [35, 44, 52, 57, 61, 72, 74, 92, 105, 106, 107], "directori": [35, 55, 56, 63, 64, 66, 67, 68, 76, 82, 84, 85, 87], "tun": 35, "them": [35, 43, 52, 55, 56, 57, 58, 76, 80, 82, 110], "example_venv": [35, 42], "kernel": [36, 42, 82], "adastop": [36, 42, 44, 63, 68, 73, 75, 110], "record": [36, 42, 49, 56, 61, 62, 63, 66, 67, 68, 76, 82, 109], "virtual": [36, 42, 85, 87, 109], "tool": [36, 42, 56, 59, 60, 61, 63, 66, 67, 76, 82, 108, 109, 110], "checkpoint": [36, 42, 59, 66, 76, 82, 108, 111], "auto_examples_python": 36, "zip": [36, 63], "auto_examples_jupyt": 36, "grid": [37, 50, 76], "world": [37, 50, 54], "access": [37, 50, 104], "transit": [37, 43, 50, 69], "next_stat": [37, 50], "Then": [37, 41, 44, 49, 50, 54, 56, 61, 62], "implement": [37, 45, 46, 48, 52, 53, 54, 57, 63, 64, 66, 67, 72, 74, 78, 105, 106, 107, 108, 109], "leftarrow": 37, "sum_": 37, "prime": [37, 43], "left": [37, 44, 49], "right": [37, 44, 49, 53, 54, 72, 74, 105, 106, 107], "baselin": [37, 43, 50, 69, 108, 110, 111], "gather": [37, 49, 52, 53, 55, 63, 76, 108], "about": [37, 53, 55, 56, 58, 64, 66, 67, 68, 72, 74, 76, 104, 105, 106, 107, 108, 110], "44": [37, 38, 41, 50, 54, 55, 56], "valueiterationagent_2024": 37, "47_80b8b7fe": 37, "randomag": [37, 49, 50, 67], "randomagent_2024": 37, "48_7ba13ec9": 37, "nrow": [37, 41, 50], "ncol": [37, 41, 50], "reward_at": [37, 41, 50], "agentwithsimplepolici": [37, 45, 49, 50, 53, 61, 68, 76], "ensur": [37, 45, 51, 53, 57, 60, 72, 73, 74, 105, 106, 107], "compat": [37, 45, 53, 56, 57, 63, 91, 92, 93, 94, 95, 96, 97, 98, 100, 108], "discount": [37, 43, 67, 68, 76], "factor": [37, 43, 67, 68, 76], "episilon": 37, "precis": [37, 44, 53], "tq": 37, "ss": 37, "aa": 37, "dot": [37, 44], "ab": [37, 52], "argmax": [37, 53], "classmethod": [37, 46, 57, 66, 67, 68, 76], "sample_paramet": [37, 46, 57, 66, 67, 68], "cl": [37, 46, 57], "trial": [37, 46, 53, 57, 66, 67, 68, 76], "hyperparam": [37, 46, 57, 66, 67, 68, 76], "optuna": [37, 46, 51, 66, 67, 68, 76, 109], "org": [37, 46, 51, 52, 53, 54, 62, 64, 66, 67, 68, 69, 76, 78, 89], "suggest_categor": [37, 46, 57], "pass": [37, 45, 50, 53, 57, 62, 64, 68, 72, 73, 74, 76, 105, 106, 107], "evaluate_ag": [37, 43, 44, 49, 50, 55, 57, 58, 60, 61], "vi_param": [37, 50], "job": [37, 50], "vi_stat": [37, 50], "baseline_stat": [37, 49, 50], "mont": [37, 45, 49, 50, 53, 66, 67, 68, 76], "carlo": [37, 45, 49, 50, 53, 66, 67, 68, 76], "simul": [37, 38, 43, 49, 50, 54, 55, 58, 60, 67, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107], "n_simul": [37, 43, 44, 49, 50, 55, 58, 60, 61, 67, 68, 76, 78, 79], "153": [37, 42], "plot_agent_manag": [37, 42], "minim": [38, 64, 72, 74, 105, 106, 107, 109], "your": [38, 45, 46, 49, 51, 56, 59, 62, 64, 72, 74, 76, 105, 106, 107, 108, 109, 110], "restor": 38, "previou": [38, 43, 49, 55, 56, 58, 64, 76, 108, 110], "my": 38, "49_e91ad515": 38, "output_0": 38, "timestep": [38, 50, 66, 67, 68, 72, 74, 105, 106, 107], "output_1": 38, "load": [38, 41, 50, 56, 57, 63, 66, 67, 68, 75, 76, 78, 82, 84, 104, 108, 110], "myagent": [38, 45], "1500": [38, 56], "data": [38, 41, 43, 44, 46, 49, 52, 53, 54, 55, 58, 59, 60, 63, 66, 67, 68, 69, 72, 76, 78, 79, 80, 81, 82, 84, 86, 91, 92, 94, 95, 96, 104, 105, 106, 107, 108, 109, 110], "checkpoint_fil": 38, "equat": [38, 44], "del": [38, 50], "check": [38, 43, 49, 51, 55, 60, 62, 63, 64, 69, 75, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 108, 110, 111], "loaded_checkpoint": 38, "get_param": [38, 66, 67, 68, 72, 74, 105, 106, 107], "__dict__": 38, "updat": [38, 43, 53, 58, 63, 69, 72, 74, 76, 105, 106, 107], "loop": [38, 53, 54], "yt": 38, "rng": [38, 51, 59, 60, 66, 67, 68, 69, 72, 74, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "normal": [38, 43, 51, 60, 91], "append": [38, 47, 57, 60, 66, 67, 69, 77], "y": [38, 44, 53, 54, 56, 59, 72, 74, 80, 81, 82, 86, 105, 106, 107], "everi": [38, 49, 54, 60, 62, 64, 84, 108], "eval": [38, 45, 49, 50, 53, 55, 58, 60, 63, 66, 67, 68, 76, 78, 79], "befor": [38, 59, 61, 62, 66, 67, 72, 74, 75, 82, 84, 105, 106, 107], "so": [38, 44, 48, 50, 59, 60, 63, 76, 104, 108], "why": [38, 110], "interrupt": 38, "continu": [38, 63, 72, 74, 75, 76, 105, 106, 107, 108], "last": [38, 41, 62, 84, 104], "But": [38, 56, 58, 59, 60, 108], "small": [38, 52], "instanti": [38, 66, 67, 76], "itself": 38, "after": [38, 46, 49, 55, 58, 59, 61, 62, 63, 72, 74, 76, 77, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "manager_fil": 38, "delet": [38, 76], "situat": 38, "couldn": 38, "g": [38, 60, 61, 62, 63, 64, 66, 67, 68, 69, 72, 74, 75, 76, 80, 81, 82, 105, 106, 107, 109], "loaded_manag": 38, "plot_checkpoint": [38, 42], "requir": [39, 45, 53, 55, 57, 58, 60, 63, 64, 66, 67, 68, 69, 76, 85, 87, 98, 99, 100, 102, 109, 110, 111], "kernel_bas": [39, 47], "kernel_func": 39, "uniform": [39, 69, 91], "triangular": 39, "epanechnikov": 39, "quartic": 39, "triweight": 39, "tricub": 39, "cosin": 39, "z": [39, 49, 75], "linspac": 39, "k_type": 39, "kernel_v": 39, "set_titl": 39, "663": [39, 42], "plot_kernel": [39, 42], "465": 40, "makesubgaussianmossindex": 40, "pad": 40, "suptitl": 40, "error": [40, 44, 48, 52, 53, 64, 72, 74, 78, 100, 103, 105, 106, 107], "raw_curv": [40, 61, 80, 81, 82], "ci": [40, 61, 80, 81, 82], "smooth": [40, 49, 61, 63, 80, 81, 82, 108], "error_represent": [40, 61, 80, 81, 82], "cb": [40, 61, 80, 82], "489": [40, 42], "plot_smooth": [40, 42], "modifi": [41, 43, 54, 62], "easili": [41, 53, 63, 108], "alreadi": [41, 44, 62, 72, 74, 105, 106, 107], "ran": 41, "onc": [41, 46, 49, 59, 61, 62], "ha": [41, 43, 48, 49, 50, 51, 52, 53, 56, 59, 60, 62, 63, 72, 74, 76, 91, 94, 95, 105, 106, 107, 108], "comment": [41, 62], "out": [41, 56, 72, 74, 76, 86, 105, 106, 107, 108, 110, 111], "line": [41, 54, 57, 58, 62, 72, 74, 76, 105, 106, 107, 108], "avoid": [41, 62, 89], "ucbviag": [41, 49, 60, 100], "ucbviagent_2024": 41, "21_f34c3498": 41, "wrape": 41, "writerwrapp": 41, "viagent": 41, "abov": [41, 59, 72, 74, 91, 104, 105, 106, 107], "preprocess": [41, 84], "compute_reward": [41, 72, 105, 106, 107], "global": [41, 47, 53, 54, 104], "necessari": [41, 51, 52, 59, 60, 61, 62, 72, 74, 105, 106, 107, 109], "custom": [41, 53, 54, 63, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107, 110], "set_xlim": 41, "relim": 41, "set_xscal": 41, "set_yscal": 41, "395": [41, 42], "plot_writer_wrapp": [41, 42], "39": [42, 50, 54, 55, 56], "auto_exampl": 42, "03": 42, "tutori": [43, 49, 63, 64], "focu": 43, "advantag": 43, "consid": [43, 44, 66, 67], "mathcal": 43, "x": [43, 44, 56, 59, 61, 63, 72, 74, 78, 80, 81, 82, 86, 91, 92, 93, 94, 95, 96, 105, 106, 107], "mid": 43, "probabl": [43, 44, 49, 50, 52, 61, 80, 82], "map": [43, 53, 54, 66, 67, 68, 72, 74, 76, 105, 106, 107], "overal": [43, 44], "expect": [43, 62, 64, 72, 74, 92, 93, 94, 95, 96, 105, 106, 107, 110, 111], "mathbb": [43, 44], "tau": 43, "sim": 43, "big": 43, "s_0": 43, "a_0": 43, "r_0": 43, "s_1": 43, "a_1": [43, 91], "r_1": 43, "s_2": 43, "s_t": 43, "a_t": 43, "r_t": 43, "drawn": [43, 94], "maxim": [43, 76], "previous": [43, 59], "openai": [43, 69, 72, 74, 91, 105, 106, 107], "larg": [43, 61], "although": 43, "other": [43, 49, 51, 53, 54, 56, 60, 61, 62, 64, 66, 67, 68, 71, 75, 110], "v0": [43, 51, 54, 60, 61, 72, 74, 89, 105, 106, 107, 109], "tabl": [43, 52, 53, 59], "basic": [43, 45, 53, 63, 66, 107, 108, 110], "compon": [43, 59], "occur": [43, 54, 72, 74, 105, 106, 107], "box": [43, 44, 92, 93, 94, 95, 96], "depend": [43, 45, 53, 56, 62, 63, 67, 72, 74, 87, 92, 96, 105, 106, 107], "mai": [43, 44, 48, 49, 54, 55, 61, 62, 64, 69, 72, 74, 76, 104, 105, 106, 107], "next": [43, 53, 62, 72, 74, 105, 106, 107], "compact": [43, 55], "wai": [43, 55, 56, 57, 61, 62, 63, 72, 74, 105, 106, 107, 108, 110], "deeprl": [43, 109], "default_xp": 43, "rollout": [43, 53, 55, 56, 58], "ep_rew_mean": [43, 53, 55, 56, 58], "09": [43, 55, 58], "31": [43, 50, 53, 54, 55, 56], "4096": [43, 53, 56], "ep_len_mean": [43, 53, 55, 56, 58], "fp": [43, 53, 54, 55, 56, 58], "791": 43, "time_elaps": [43, 53, 55, 56, 58], "2048": [43, 53, 55, 56, 58], "0003": [43, 53, 55, 56], "741": 43, "751": 43, "6144": [43, 53, 56], "617": 43, "entropy_loss": [43, 53, 55, 56, 61], "0967000976204873": 43, "policy_gradient_loss": [43, 53, 55, 56], "0017652213326073251": 43, "value_loss": [43, 53, 55, 56], "139": [43, 55], "4249062538147": 43, "approx_kl": [43, 53, 55, 56], "004285778850317001": 43, "clip_fract": [43, 53, 55, 56], "0044921875": 43, "loss": [43, 53, 55, 56, 61], "16": [43, 46, 50, 53, 54, 55, 56, 60], "845857620239258": 43, "explained_vari": [43, 53, 55, 56], "0011605024337768555": 43, "n_updat": [43, 53, 55, 56], "clip_rang": [43, 53, 55, 56], "100352": 43, "48": [43, 50, 53, 54, 55, 56], "89": [43, 50, 55, 58], "81": [43, 50, 55, 56], "90": [43, 50, 56, 63], "486": 43, "202": 43, "98304": 43, "19921453138813378": 43, "002730156043253373": 43, "21": [43, 50, 53, 54, 55, 56, 60, 107], "20977843105793": 43, "0014179411809891462": 43, "017626953125": 43, "601455688476562": 43, "8966712430119514": 43, "470": [43, 55, 63], "14615743807516993": 43, "002418491238495335": 43, "7100858271122": 43, "0006727844011038542": 43, "010546875": 43, "74121379852295": 43, "8884317129850388": 43, "default_2024": 43, "24_09": 43, "51_be15b329": 43, "let": [43, 49, 57], "chang": [43, 54, 61, 62, 63, 72, 74, 76, 105, 106, 107, 108], "aim": [43, 44, 63, 108], "section": [43, 44, 49, 52, 53, 61, 64, 78], "effect": [43, 72, 74, 105, 106, 107], "demonstr": 43, "pedagog": 43, "sinc": [43, 56, 66, 67, 91, 92, 93, 94, 95, 96], "ll": [43, 47, 53, 59], "wrong": 43, "decreas": 43, "obvious": 43, "practic": [43, 48, 60, 62], "improv": [43, 63, 64], "ent_coef": [43, 57], "much": [43, 63, 69], "forc": [43, 54], "explor": [43, 49, 53, 72, 74, 105, 106, 107], "normalize_advantag": [43, 57], "trade": 43, "off": 43, "bia": [43, 48], "varianc": 43, "n_epoch": 43, "epoch": [43, 104], "surrog": 43, "incorrectli": 43, "37": [43, 50, 53, 54, 55, 56], "832": [43, 55], "260": [43, 44, 55], "768": 43, "9725531369447709": 43, "175539326667786": 43, "17": [43, 50, 55, 56], "705344581604002": 43, "028903376311063766": 43, "33828125": 43, "651824951171875": 43, "03754150867462158": 43, "220": [43, 54, 56], "251": 43, "252": [43, 56], "0311604633927345": 43, "122353088855744": 43, "18": [43, 50, 53, 55, 56, 58, 60], "54480469226837": 43, "02180374786257744": 43, "359375": 43, "690193176269531": 43, "00020706653594970703": 43, "tuned_2024": 43, "32_33d1646b": 43, "wors": [43, 44], "lower": [43, 48], "47": [43, 50, 54, 55, 56, 60], "perform": [44, 48, 49, 50, 54, 60, 61, 64, 67, 68, 72, 74, 76, 80, 81, 82, 105, 106, 107], "deep": [44, 57, 61, 63, 64, 66, 67, 68, 72, 74, 105, 106, 107, 108, 111], "independ": [44, 51, 60, 92, 94, 96, 108], "abl": [44, 49, 52, 53, 61], "sai": [44, 49, 50, 52], "inde": [44, 60, 108], "perceiv": 44, "its": [44, 49, 51, 54, 55, 58, 59, 62, 63, 76, 82, 84, 111], "most": [44, 48, 59, 72, 74, 80, 82, 84, 105, 106, 107, 108], "form": [44, 64, 91], "decid": 44, "given": [44, 59, 63, 68, 69, 72, 76, 82, 84, 97, 104], "x_1": 44, "x_n": 44, "adher": [44, 61], "h_0": 44, "null": 44, "better": [44, 48, 54, 55, 62, 63], "altern": [44, 47, 48, 49, 62], "h_1": 44, "y_1": 44, "y_n": 44, "x_i": 44, "equal": [44, 50, 52, 63, 75, 82, 84], "y_i": 44, "quad": 44, "neq": 44, "both": [44, 49, 63, 87, 91], "accept": [44, 45, 53, 62, 64, 72, 74, 75, 105, 106, 107], "reject": 44, "answer": 44, "ground": 44, "truth": 44, "howev": [44, 48, 72, 74, 105, 106, 107], "often": [44, 48, 62, 72, 74, 82, 105, 106, 107], "control": [44, 52, 54, 62, 63, 72, 74, 78, 105, 106, 107], "decompos": 44, "denot": 44, "alpha": [44, 52, 59, 69, 75, 78], "respect": [44, 54, 56], "symmetr": 44, "fail": [44, 59, 66, 67, 100], "doe": [44, 48, 49, 60, 62, 63, 64, 66, 67, 72, 74, 78, 88, 92, 96, 100, 105, 106, 107], "It": [44, 45, 48, 51, 53, 55, 56, 59, 60, 62, 66, 67, 72, 74, 76, 105, 106, 107, 108, 110], "simultan": [44, 61, 80, 81, 82], "must": [44, 48, 49, 56, 57, 61, 62, 64, 66, 67, 69, 76, 78, 80, 81, 82, 84, 86], "care": [44, 55, 56, 59], "accumul": 44, "cautiou": 44, "becom": [44, 52, 61], "non": [44, 51, 60, 61, 63, 72, 74, 78, 80, 81, 82, 105, 106, 107], "neglig": 44, "consequ": [44, 48], "strategi": [44, 67], "develop": [44, 53, 54, 60, 62, 64, 108, 112], "deal": 44, "To": [44, 46, 47, 48, 49, 50, 53, 54, 55, 56, 58, 59, 61, 62, 64, 69, 86, 90, 109, 110], "There": [44, 57, 61], "famili": [44, 52, 78], "wise": [44, 52, 78], "least": 44, "mathrm": 44, "fwe": 44, "h_j": 44, "textbf": 44, "indic": [44, 50, 62, 64, 69, 72, 74, 75, 105, 106, 107], "hypothes": [44, 78], "actual": 44, "c": [44, 49, 53, 54], "rbagent": 44, "3e4": 44, "eval_ag": [44, 76], "idx": [44, 76], "obtain": [44, 49, 50, 52, 53, 67, 68, 76, 82, 84], "These": [44, 64, 110], "through": [44, 48, 53, 54, 59, 61, 67, 68, 72, 74, 104, 105, 106, 107, 110], "val": 44, "signific": [44, 52], "416": 44, "9975": 44, "00000": 44, "00250": 44, "147": [44, 55], "338488": 44, "266444": 44, "38375": 44, "156": 44, "61375": 44, "179": 44, "503659": 44, "017001": 44, "0000": 44, "239": 44, "61625": 44, "80": [44, 50, 55, 56], "271521": 44, "000410": 44, "our": [44, 46, 49, 50, 54, 55, 56, 58, 59, 60, 61, 62, 64, 108, 110], "necessarili": [44, 66, 67], "same": [44, 49, 51, 52, 53, 55, 57, 59, 60, 61, 62, 69, 73, 76, 77, 82, 92, 96, 97, 99], "black": [44, 62], "sens": [44, 91, 97], "don": [44, 50, 53, 58, 59, 60, 108], "were": [44, 108], "mani": [44, 49, 60, 84], "suppos": [44, 60, 61, 78], "user": [44, 48, 53, 54, 56, 61, 62, 63, 72, 74, 76, 105, 106, 107, 108, 109], "adequ": 44, "fair": [44, 60, 94], "further": [44, 72, 74, 105, 106, 107], "littl": [44, 64], "look": [44, 46, 62, 64], "veri": [45, 46, 47, 49, 52, 53, 61], "write": [45, 51, 53, 54, 55, 60, 62, 108], "below": [45, 46, 47, 52, 53, 57, 91, 111], "param1": 45, "param2": 45, "eval_env": [45, 47, 50, 55, 63, 66, 67, 68, 70, 76], "repres": [45, 52, 53, 61, 67, 68, 69, 72, 74, 80, 81, 82, 91, 93, 105, 106, 107], "episode_reward": [45, 46, 53, 59, 82, 84], "correspond": [45, 53, 61, 66, 67, 68, 82, 84, 104, 109], "option": [45, 47, 49, 53, 54, 57, 58, 63, 66, 67, 68, 69, 71, 72, 74, 76, 80, 81, 82, 92, 93, 94, 95, 96, 104, 105, 106, 107], "With": [46, 49, 54, 61], "easi": [46, 56, 57, 108], "analyz": 46, "shown": 46, "reinforceag": 46, "stat": [46, 57, 78], "thread": [46, 51, 53, 54, 55, 63, 66, 67, 68, 76, 77, 108], "defaultwrit": [46, 58, 63, 66, 67, 68, 76], "those": 46, "sample_paratem": 46, "sent": [46, 66, 67, 68], "sure": [46, 62, 66, 67, 68, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "includ": [46, 53, 54, 57, 62, 64, 66, 67, 68, 69, 72, 74, 105, 106, 107], "optimiz": [46, 66, 67, 68], "suggest_float": [46, 57], "entr_coef": 46, "optimize_hyperparam": [46, 51, 57, 76], "n_trial": [46, 76], "stop": [46, 52, 53, 66, 67, 75, 76, 96, 108], "sampler_method": [46, 76], "optuna_default": [46, 76], "best_hyperparam": 46, "again": [46, 58, 72, 74, 105, 106, 107], "describ": [47, 62, 72, 74, 105, 106, 107], "read": [47, 64, 84, 104, 108], "succinctli": 47, "descript": [47, 61, 62, 64], "demo_experi": 47, "rsucbvi_altern": 47, "rs_ucbvi": 47, "lp_metric": 47, "max_repr": 47, "800": 47, "fit_kwarg": [47, 57, 76], "base_config": 47, "h": [47, 53, 54], "experiment_gener": 47, "multiple_manag": 47, "multiplemanag": [47, 48, 57], "multimanag": [47, 57], "experiment_manag": [47, 59, 76, 77], "standard": 48, "multiprocess": [48, 76, 77, 110], "cpu": [48, 53, 54, 55, 58], "third": [48, 60], "parti": [48, 60], "joblib": 48, "awar": 48, "nativ": [48, 53, 54], "scheme": [48, 108], "higher": 48, "top": [48, 52, 58, 62], "_thread": 48, "modul": [48, 50, 76, 77, 98, 99, 100, 102, 104], "websit": [48, 64], "without": [48, 60, 63, 64], "gil": 48, "cython": 48, "impli": 48, "new": [48, 49, 54, 60, 62, 63, 64, 66, 67, 69, 72, 74, 77, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 109, 111], "launch": [48, 64, 85], "advis": 48, "drawback": 48, "encapsul": 48, "direct": [48, 49, 50, 53, 54], "sometim": [48, 61], "avail": [48, 51, 53, 54, 60, 61, 62, 76, 88], "unix": [48, 76], "system": [48, 64], "maco": 48, "could": [48, 49, 72, 74, 104, 105, 106, 107, 108], "hang": 48, "usag": [48, 109], "still": 48, "unstabl": [48, 61], "cc": 49, "bf": 49, "qq": 49, "rr": 49, "zz": 49, "nn": 49, "panda": [49, 50, 56, 58, 61, 76, 78, 80, 81, 82, 84, 86, 104], "pd": [49, 50, 59, 80], "l": [49, 54, 61, 78], "fail_prob": [49, 61], "length": [49, 61, 67, 68, 69, 76, 84], "proba": [49, 61], "take": [49, 50, 53, 56, 57, 61, 63, 64, 67, 72, 74, 76, 80, 82, 87, 105, 106, 107, 110], "might": [49, 62, 66, 67, 72, 74, 105, 106, 107], "opposit": 49, "accord": [49, 59, 80, 81, 91], "failur": 49, "graphic": 49, "represent": [49, 72, 74, 75, 105, 106, 107], "save_gif": [49, 61, 72, 105, 106, 107], "gif_chain": [49, 61], "gif": [49, 63, 110], "clear": [49, 61, 69, 104], "clear_render_buff": [49, 61], "disable_rend": [49, 61, 72, 105, 106, 107], "design": [49, 61, 108], "One": [49, 60, 91], "featur": [49, 62, 63, 109, 110], "diagram": 49, "explain": [49, 52, 62], "briefli": 49, "few": [49, 57, 61, 62, 64, 108, 111], "word": 49, "spawn": [49, 51, 60, 63, 66, 67, 72, 74, 76, 77, 82, 83, 84, 89, 91, 93, 94, 95, 105, 106, 107], "well": 49, "arbitrari": 49, "specif": [49, 54, 55, 59, 60, 66, 67, 72, 74, 76, 105, 106, 107], "thing": [49, 56, 108], "desir": [49, 54, 64, 105], "summar": 49, "ucbvi_param": 49, "ucbvi_stat": 49, "depth": [49, 62], "methodologi": 49, "cannot": [49, 54, 63, 66, 67, 76], "simpli": [49, 57], "random_param": 49, "ucbi": 49, "ucbviagent2": 49, "5000": [49, 56], "randomagent2": [49, 50], "optimalag": 49, "allow": [49, 51, 52, 53, 55, 56, 60, 61, 63, 66, 67, 69, 76, 82, 92, 96, 107, 108, 109], "recov": 49, "henc": [49, 64, 82], "raw": [49, 80, 81, 82, 91], "instal": [49, 61, 63, 64, 87, 108, 110], "extra": [49, 53, 54, 62, 64, 66, 67, 68, 76, 109, 110], "packag": [49, 54, 62, 64, 109], "scikit": [49, 61, 62, 63, 80, 109], "fda": [49, 61, 63, 80, 109], "page": [49, 56, 63, 64, 108], "opengl_acceler": 50, "And": 50, "row": 50, "column": [50, 56, 59, 61, 78, 80, 81, 82, 84, 86], "posit": [50, 72, 74, 95, 105, 106, 107], "plai": 50, "idea": 50, "n_simimul": 50, "redefin": 50, "tell": 50, "retreiv": 50, "super": [50, 53, 57, 72, 74, 105, 106, 107], "unus": [50, 63], "episode_regret": 50, "valueiterationagent2": 50, "addit": [50, 62, 64, 72, 73, 74, 105, 106, 107], "won": [50, 63, 72, 74, 105, 106, 107], "14": [50, 53, 55, 56, 60, 63], "19": [50, 54, 55, 56, 60], "23": [50, 53, 54, 55, 56, 58, 60], "24": [50, 53, 54, 55, 56, 60], "26": [50, 55, 56, 60, 72, 74, 105, 106, 107], "29": [50, 53, 54, 55, 56], "30": [50, 53, 55, 56], "33": [50, 53, 54, 55, 56], "34": [50, 55, 56], "38": [50, 53, 54, 56], "43": [50, 55, 56], "49": [50, 55, 56], "51": [50, 53, 54, 55, 56, 63], "52": [50, 54, 55, 56], "53": [50, 53, 56], "54": [50, 96], "55": [50, 53, 54, 55, 56], "57": [50, 53, 55, 56], "58": [50, 53, 54, 55, 56], "59": [50, 53, 55, 56], "60": [50, 53, 54, 55, 56, 76], "62": [50, 55, 56], "63": [50, 55, 56], "65": [50, 54, 56], "67": [50, 55, 56], "68": [50, 56], "69": [50, 53, 54, 56], "70": [50, 53, 54, 55, 56], "71": 50, "72": [50, 53, 54, 56], "74": [50, 56], "76": [50, 53, 54, 55, 56], "77": [50, 55], "78": [50, 53, 56], "79": [50, 55, 56], "82": [50, 55, 56, 58], "86": [50, 55], "87": [50, 53, 54, 55, 56], "88": [50, 56], "91": [50, 53, 54, 55, 56], "92": [50, 53, 55], "94": [50, 54, 55, 56], "96": [50, 53, 55, 56], "97": [50, 55, 63], "98": [50, 53], "datafram": [50, 56, 58, 59, 61, 75, 76, 78, 79, 80, 81, 82, 84, 86, 108], "tolist": 50, "axessubplot": 50, "linear": 50, "seem": 50, "around": [50, 54, 61, 80, 81, 82, 109], "intend": [50, 64], "target": 50, "rlberry_": [51, 57], "conveni": [51, 60, 108], "wrap": [51, 53, 55, 57, 58, 60, 61, 63, 68, 70, 72, 73, 74, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "seedsequ": [51, 60, 66, 67, 72, 74, 76, 89, 91, 93, 94, 95, 105, 106, 107, 108], "singl": [51, 54, 60, 61, 72, 74, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107, 108], "refer": [51, 62, 63, 66, 67, 68, 69, 72, 74, 78, 89, 105, 106, 107], "html": [51, 53, 54, 64, 69, 71, 76, 78, 89, 108], "integ": [51, 56, 59, 60, 72, 74, 89, 90, 92, 93, 95, 96, 105, 106, 107], "own": [51, 56, 60, 108, 110], "inherit": [51, 54, 60, 66, 67, 91, 92, 93, 94, 95, 96], "whenev": [51, 60], "appli": [51, 60, 70, 72, 74, 82, 84, 104, 105, 106, 107], "seeder1": [51, 60], "seeder2": [51, 60], "extern": [51, 90, 110], "set_external_se": [51, 60], "inher": 51, "faq": 51, "reliabl": [52, 108, 109], "introduc": [52, 63], "arxiv": 52, "2306": 52, "10882": 52, "group": 52, "permut": [52, 75, 78], "especi": 52, "easier": [52, 54, 61, 108, 110], "bind": [52, 109], "choos": [52, 53, 58, 67, 80, 82, 110, 111], "rational": 52, "scienc": 52, "viabl": 52, "rank": 52, "theoret": 52, "repetit": 52, "soon": 52, "pleas": [52, 62, 64, 72, 74, 105, 106, 107], "realli": 52, "certain": [52, 72, 74, 105, 106, 107], "limit": [52, 104], "maximum": [52, 63, 67, 68, 69, 74, 75, 76], "batch": [52, 69, 91, 92, 94, 95, 96], "ask": [52, 61], "wrongli": 52, "format": [52, 61, 62, 80, 81, 86], "print_result": [52, 75], "score": [52, 75, 78], "271": 52, "17600000000004": 52, "plot_result": [52, 75], "larger": [52, 61, 75, 80, 82], "entiti": 53, "api": [53, 54, 55, 62, 72, 74, 105, 106, 107, 108, 110], "guid": [53, 56, 59, 62, 63, 64, 108], "renderinterfac": [53, 54, 72], "_agent_page_chain1": 53, "cross": [53, 62, 109], "creation": [53, 63], "_agent_page_chain2": 53, "n_iter": [53, 75], "269": [53, 56, 63], "06": [53, 55, 56], "pg": 53, "displai": [53, 56, 72, 74, 105, 106, 107], "set_mod": 53, "doublebuf": 53, "opengl": [53, 54, 63], "ffmpeg": [53, 54, 61, 109], "0ubuntu0": [53, 54], "copyright": [53, 54], "built": [53, 54], "gcc": [53, 54], "19ubuntu1": [53, 54], "configur": [53, 54, 56, 57, 62, 69, 71], "prefix": [53, 54, 62], "usr": [53, 54], "toolchain": [53, 54], "harden": [53, 54], "libdir": [53, 54], "lib": [53, 54, 66], "x86_64": [53, 54], "gnu": [53, 54], "incdir": [53, 54], "arch": [53, 54], "amd64": [53, 54], "enabl": [53, 54, 55, 69, 76], "gpl": [53, 54], "disabl": [53, 54, 76], "strip": [53, 54], "gnutl": [53, 54], "ladspa": [53, 54], "libaom": [53, 54], "libass": [53, 54], "libblurai": [53, 54], "libbs2b": [53, 54], "libcaca": [53, 54], "libcdio": [53, 54], "libcodec2": [53, 54], "libdav1d": [53, 54], "libflit": [53, 54], "libfontconfig": [53, 54], "libfreetyp": [53, 54], "libfribidi": [53, 54], "libgm": [53, 54], "libgsm": [53, 54], "libjack": [53, 54], "libmp3lam": [53, 54], "libmysofa": [53, 54], "libopenjpeg": [53, 54], "libopenmpt": [53, 54], "libopu": [53, 54], "libpuls": [53, 54], "librabbitmq": [53, 54], "librubberband": [53, 54], "libshin": [53, 54], "libsnappi": [53, 54], "libsoxr": [53, 54], "libspeex": [53, 54], "libsrt": [53, 54], "libssh": [53, 54], "libtheora": [53, 54], "libtwolam": [53, 54], "libvidstab": [53, 54], "libvorbi": [53, 54], "libvpx": [53, 54], "libwebp": [53, 54], "libx265": [53, 54], "libxml2": [53, 54], "libxvid": [53, 54], "libzimg": [53, 54], "libzmq": [53, 54], "libzvbi": [53, 54], "lv2": [53, 54], "omx": [53, 54], "open": [53, 54, 64], "opencl": [53, 54], "sdl2": [53, 54], "pocketsphinx": [53, 54], "librsvg": [53, 54], "libmfx": [53, 54], "libdc1394": [53, 54], "libdrm": [53, 54], "libiec61883": [53, 54], "chromaprint": [53, 54], "frei0r": [53, 54], "libx264": [53, 54], "share": [53, 54, 66, 67, 68, 76], "libavutil": [53, 54], "libavcodec": [53, 54], "134": [53, 54], "libavformat": [53, 54], "libavdevic": [53, 54], "libavfilt": [53, 54], "110": [53, 54, 55, 56], "libswscal": [53, 54], "libswresampl": [53, 54], "libpostproc": [53, 54], "input": [53, 54, 56, 61, 66, 67, 72, 76, 78, 82, 86, 89, 108], "rawvideo": [53, 54], "pipe": [53, 54], "durat": [53, 54], "000000": [53, 54], "bitrat": [53, 54], "38400": [53, 54], "kb": [53, 54], "stream": [53, 54, 109], "rgb": [53, 54, 72, 74, 105, 106, 107], "0x18424752": [53, 54], "rgb24": [53, 54], "800x80": [53, 54], "tbr": [53, 54], "tbn": [53, 54], "tbc": [53, 54], "h264": [53, 54], "0x5570932967c0": 53, "capabl": [53, 54], "mmx2": [53, 54], "sse2fast": [53, 54], "ssse3": [53, 54], "sse4": [53, 54], "avx": [53, 54], "fma3": [53, 54], "bmi2": [53, 54], "avx2": [53, 54], "avx512": [53, 54], "profil": [53, 54, 63, 76], "high": [53, 54, 91], "bit": [53, 54], "264": [53, 54, 63], "163": [53, 54, 56], "r3060": [53, 54], "5db6aa6": [53, 54], "mpeg": [53, 54], "avc": [53, 54], "codec": [53, 54, 61], "copyleft": [53, 54], "2003": [53, 54], "videolan": [53, 54], "x264": [53, 54], "cabac": [53, 54], "ref": [53, 54], "deblock": [53, 54], "analys": [53, 54], "0x3": [53, 54], "0x113": [53, 54], "me": [53, 54], "hex": [53, 54], "subm": [53, 54], "psy": [53, 54], "psy_rd": [53, 54], "mixed_ref": [53, 54], "me_rang": [53, 54], "chroma_m": [53, 54], "trelli": [53, 54], "8x8dct": [53, 54], "cqm": [53, 54], "deadzon": [53, 54], "fast_pskip": [53, 54], "chroma_qp_offset": [53, 54], "lookahead_thread": [53, 54], "sliced_thread": [53, 54], "nr": [53, 54], "decim": [53, 54], "interlac": [53, 54], "bluray_compat": [53, 54], "constrained_intra": [53, 54], "bframe": [53, 54], "b_pyramid": [53, 54], "b_adapt": [53, 54], "b_bia": [53, 54], "weightb": [53, 54], "open_gop": [53, 54], "weightp": [53, 54], "keyint": [53, 54], "250": [53, 54, 55, 63], "keyint_min": [53, 54], "scenecut": [53, 54], "intra_refresh": [53, 54], "rc_lookahead": [53, 54], "rc": [53, 54], "crf": [53, 54], "mbtree": [53, 54], "qcomp": [53, 54], "qpmin": [53, 54], "qpmax": [53, 54], "qpstep": [53, 54], "ip_ratio": [53, 54], "aq": [53, 54], "_agent_page_chain": 53, "encod": [53, 54], "lavf58": [53, 54], "avc1": [53, 54], "0x31637661": [53, 54], "yuv420p": [53, 54], "tv": [53, 54], "progress": [53, 54, 62, 76], "12800": [53, 54], "lavc58": [53, 54], "side": [53, 54], "cpb": [53, 54], "min": [53, 54], "avg": [53, 54], "buffer": [53, 54, 63, 69], "vbv_delai": [53, 54], "frame": [53, 54, 61, 62, 72, 74, 105, 106, 107], "lsize": [53, 54], "12kb": 53, "9kbit": 53, "speed": [53, 54], "8x": 53, "11kb": [53, 54], "audio": [53, 54, 109], "0kb": [53, 54], "subtitl": [53, 54], "header": [53, 54], "mux": [53, 54], "overhead": [53, 54], "817029": 53, "qp": [53, 54], "6089": 53, "172": 53, "consecut": [53, 54, 104], "i16": [53, 54], "p16": [53, 54], "skip": [53, 54, 62], "b16": [53, 54], "l0": [53, 54], "l1": [53, 54], "bi": [53, 54], "8x8": [53, 54], "transform": [53, 54], "intra": [53, 54], "inter": [53, 54], "uvdc": [53, 54], "uvac": [53, 54], "dc": [53, 54], "i8": [53, 54], "ddl": [53, 54], "ddr": [53, 54], "vr": [53, 54], "hd": [53, 54], "vl": [53, 54], "hu": [53, 54], "i4": [53, 54], "i8c": [53, 54], "weight": [53, 54, 60, 69], "uv": [53, 54], "stablebaseline3": 53, "video_fold": [53, 54, 55], "name_prefix": [53, 54, 55], "devic": [53, 55, 58], "monitor": [53, 55, 58], "dummyvecenv": [53, 55, 58], "2490": 53, "1842": 53, "009214947": 53, "102": [53, 55], "686": 53, "00179": 53, "0158": 53, "1708": 53, "009872524": 53, "0705": 53, "666": 53, "119": [53, 55, 63], "0195": 53, "6860913151875139": 53, "015838009686558508": 53, "528612112998964": 53, "009214947000145912": 53, "10205078125": 53, "420166969299316": 53, "001785874366760254": 53, "1674": 53, "8192": [53, 56], "0076105352": 53, "634": 53, "246": [53, 56], "0151": 53, "1655": 53, "10240": [53, 56], "006019583": 53, "0597": 53, "606": 53, "238": 53, "0147": 53, "moviepi": [53, 54, 55], "build": [53, 54, 55], "yourpath": [53, 55], "readi": [53, 54, 55, 64], "advanc": [53, 54], "myagentqlearn": 53, "exploration_r": [53, 59], "discount_factor": 53, "state_space_s": 53, "action_space_s": 53, "percentag": 53, "q_tabl": 53, "store": [53, 55, 58, 59, 61, 66, 67, 68, 69, 76, 104], "next_step": 53, "explo": 53, "rand": 53, "exploit": 53, "frozenlak": [53, 59], "is_slipperi": [53, 59], "remov": [53, 54, 63, 72, 74, 105, 106, 107], "slipperi": 53, "100000": [53, 67, 68], "content": [53, 59], "frozenlake_no_slipperi": 53, "73509189": [53, 59], "77378094": [53, 59], "81450625": [53, 59], "857375": [53, 59], "9025": [53, 59], "element": [53, 55, 72, 74, 95, 105, 106, 107, 108], "rule": 54, "_env_page_chain": 54, "0x5644b31f07c0": 54, "4kbit": 54, "6x": 54, "10kb": 54, "128633": 54, "6175": 54, "124": [54, 55], "common": [54, 68, 71], "mountain": 54, "car": 54, "mujoco": 54, "classic": [54, 108], "add": [54, 55, 58, 59, 60, 62, 63, 66, 67, 68, 91, 92, 93, 94, 95, 96, 109], "conda": [54, 109], "python3": [54, 62, 63, 64], "overwrit": 54, "specifi": [54, 55, 59, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107, 108], "311": [54, 56], "is_vector_env": 54, "deprec": [54, 59, 72, 74, 105, 106, 107], "unwrap": [54, 72, 74, 105, 106, 107], "get_wrapper_attr": [54, 72, 74, 105, 106, 107], "search": [54, 66, 67, 76, 80, 82, 104], "remind": [54, 110, 111], "2600": [54, 109], "stella": 54, "arcad": [54, 109], "game": [54, 63, 109], "life": [54, 108], "53f58b7": 54, "power": [54, 75], "passive_env_check": 54, "335": [54, 56, 63], "declar": 54, "render_fp": 54, "inconsist": 54, "child": [54, 60], "templat": [54, 62], "solut": [55, 109], "recommend": [55, 59, 66, 89, 96], "stablebaselines3": [55, 56, 57, 58, 61, 63, 68, 110], "env_id": [55, 58, 59, 60], "first_experi": [55, 58, 60], "tupl": [55, 58, 59, 60, 63, 66, 67, 68, 70, 74, 75, 76, 91, 92, 93, 94, 95, 98, 99, 100, 102, 105], "ppo_first_experi": [55, 58, 60], "ppo_first_experimentcartpol": [55, 58, 60], "2977": [55, 58], "v1_2024": [55, 58], "12_09": [55, 58], "10_3a9fa8ad": [55, 58], "121": [55, 56, 58], "bigger": 55, "fine": 55, "second_experi": 55, "ppo_second_experi": 55, "ppo_second_experimentcartpol": 55, "2688": 55, "044444444444444": 55, "888": 55, "2592": 55, "6261792600154876": 55, "001418954369607306": 55, "49215440750122": 55, "0018317258218303323": 55, "3124942779541": 55, "33643925189971924": 55, "5568": 55, "19354838709677": 55, "916": 55, "5472": 55, "617610102891922": 55, "0007477130696315725": 55, "27523021697998": 55, "8932236343971454e": 55, "402034759521484": 55, "46521711349487305": 55, "560": 55, "8640": 55, "107": [55, 56], "29113924050633": 55, "946": 55, "8544": 55, "5820738852024079": 55, "008271816929482156": 55, "279": [55, 56], "90625591278075": 55, "005026700906455517": 55, "03750000102445483": 55, "192": [55, 56], "93894958496094": 55, "00014603137969970703": 55, "880": 55, "45_77245043": 55, "108": [55, 56, 59], "130": [55, 56], "166": 55, "imag": [55, 72, 74, 105, 106, 107], "succe": 55, "pipelineenv": [55, 63], "eval_env_ctor": 55, "eval_env_kwarg": 55, "third_experi": 55, "ppo_third_experi": 55, "output3": 55, "ppo_third_experimentcartpol": 55, "1920": 55, "146341463414636": 55, "687": 55, "1824": 55, "612512381374836": 55, "004653797230503187": 55, "76153821945191": 55, "008641918189823627": 55, "03333333339542151": 55, "162071228027344": 55, "3032127618789673": 55, "180": [55, 56, 63], "4704": 55, "20689655172414": 55, "804": 55, "4608": 55, "5940127298235893": 55, "016441003710982238": 55, "154": 55, "39369611740113": 55, "010226544924080372": 55, "07500000102445484": 55, "81913375854492": 55, "005669653415679932": 55, "7392": 55, "08108108108108": 55, "826": 55, "7296": 55, "5620817124843598": 55, "0007149307257350301": 55, "1684087753296": 55, "00030671278364025056": 55, "46017837524414": 55, "4496734142303467": 55, "750": 55, "9984": 55, "103": 55, "113": 55, "64285714285714": 55, "9888": 55, "5782853797078132": 55, "012480927801546693": 55, "679842436313628": 55, "013762158341705799": 55, "04479166660457849": 55, "8429009914398193": 55, "32027459144592285": 55, "1020": 55, "09_da4411b3": 55, "fourth_experi": 55, "15000": 55, "ppo_fourth_experi": 55, "fourth_experiment_result": 55, "1440": 55, "86046511627907": 55, "497": 55, "1344": 55, "6368376821279526": 55, "0030540588200588916": 55, "8653003692627": 55, "0012531293323263526": 55, "012786865234375": 55, "270730197429657": 55, "1536": 55, "22857142857143": 55, "502": 55, "618910662829876": 55, "007122196507649825": 55, "85383853912353": 55, "004074861295521259": 55, "009375000186264516": 55, "4535026550293": 55, "02206575870513916": 55, "140": [55, 56], "2976": 55, "49090909090909": 55, "2880": 55, "5889034822583199": 55, "010608512769977096": 55, "22348279953003": 55, "004636458586901426": 55, "06354166707023978": 55, "387840270996094": 55, "16999149322509766": 55, "290": 55, "3072": 55, "510": 55, "5197424411773681": 55, "00876332552181035": 55, "44287853240967": 55, "0023070008028298616": 55, "723819732666016": 55, "12456077337265015": 55, "4416": 55, "71428571428571": 55, "490": 55, "4320": 55, "6150185167789459": 55, "011918687870623534": 55, "91612710952759": 55, "012545783072710037": 55, "07500000055879355": 55, "261075973510742": 55, "5195063650608063": 55, "440": 55, "05357142857143": 55, "484": 55, "5382911033928395": 55, "01824581954351743": 55, "797289085388186": 55, "009921143762767315": 55, "11458333358168601": 55, "537925720214844": 55, "77537801861763": 55, "5760": 55, "475": 55, "5664": 55, "6097852572798729": 55, "005027322360729158": 55, "29339656829834": 55, "0017487265868112445": 55, "345821380615234": 55, "14309996366500854": 55, "580": 55, "5856": 55, "72058823529412": 55, "473": 55, "5608772337436676": 55, "000609715585354298": 55, "05806636810303": 55, "0004261930880602449": 55, "013322830200195": 55, "13966631889343262": 55, "590": 55, "27631578947368": 55, "479": 55, "7200": 55, "5558830961585045": 55, "0035663537412043756": 55, "799719190597536": 55, "010704685002565384": 55, "026041666883975266": 55, "911317825317383": 55, "7546885907649994": 55, "740": 55, "7488": 55, "78378378378379": 55, "481": 55, "5342976003885269": 55, "003940091139186919": 55, "248546028137206": 55, "0034270065370947123": 55, "006250000186264515": 55, "23060417175293": 55, "000810086727142334": 55, "760": 55, "8832": 55, "8021978021978": 55, "483": 55, "8736": 55, "5868215769529342": 55, "003875821301941573": 55, "918508291244507": 55, "003322127740830183": 55, "01250000037252903": 55, "372678279876709": 55, "9379729703068733": 55, "900": 55, "8928": 55, "62025316455696": 55, "477": 55, "5886116042733193": 55, "0061642722316454625": 55, "651307249069214": 55, "00532135833054781": 55, "02083333395421505": 55, "704312801361084": 55, "9291621446609497": 55, "910": 55, "10272": 55, "106": 55, "26530612244898": 55, "10176": 55, "5608879745006561": 55, "00618959182217318": 55, "341": [55, 56], "5462481498718": 55, "002049457747489214": 55, "115": [55, 56], "31817626953125": 55, "03178894519805908": 55, "1050": 55, "10464": 55, "120": [55, 56], "68235294117648": 55, "480": 55, "10368": 55, "5731720849871635": 55, "008771563362703683": 55, "809": 55, "6955997467041": 55, "004173839930444956": 55, "018750000465661287": 55, "382": [55, 63], "14801025390625": 55, "0032292604446411133": 55, "1070": 55, "11616": 55, "474": [55, 63], "11520": 55, "5582666076719761": 55, "006921725869559215": 55, "403": [55, 56], "52278537750243": 55, "002198959467932582": 55, "240": 55, "1759490966797": 55, "19455385208129883": 55, "1190": 55, "11808": 55, "122": [55, 56], "129": [55, 56], "01111111111112": 55, "11712": 55, "5791081696748733": 55, "0027768491202399214": 55, "579": 55, "2247428894043": 55, "0006394482916221023": 55, "69767761230469": 55, "7034773826599121": 55, "1210": 55, "12864": 55, "133": 55, "467": [55, 63], "12768": 55, "585125806927681": 55, "0028700591409382527": 55, "194953203201294": 55, "007809734903275967": 55, "015625000279396773": 55, "225722789764404": 55, "04570794105529785": 55, "1320": 55, "13152": 55, "136": [55, 56], "137": 55, "4516129032258": 55, "13056": 55, "566350145637989": 55, "004751363794999452": 55, "131759536266326": 55, "0031535024754703045": 55, "002083333395421505": 55, "930537462234497": 55, "9139308258891106": 55, "1350": 55, "14112": 55, "146": [55, 56], "14016": 55, "5963118925690651": 55, "007955966270916815": 55, "367142927646636": 55, "010106074623763561": 55, "07291666744276881": 55, "582631826400757": 55, "14140963554382324": 55, "1450": 55, "14304": 55, "148": [55, 63], "145": 55, "38144329896906": 55, "457": [55, 63], "14208": 55, "5282964281737804": 55, "0037513579605426006": 55, "7543770960532129": 55, "003314702305942774": 55, "04687500018626452": 55, "028775174170732498": 55, "9980020457878709": 55, "1470": 55, "974358974358974": 55, "429": [55, 56], "1248": 55, "6186478942632675": 55, "01541837720311987": 55, "90045881271362": 55, "008347732946276665": 55, "05625000074505806": 55, "35782241821289": 55, "03064143657684326": 55, "27777777777778": 55, "420": 55, "6108133271336555": 55, "005322299412182474": 55, "101": [55, 56], "77589092254638": 55, "0019470953848212957": 55, "0010416666977107526": 55, "62103271484375": 55, "07671612501144409": 55, "791666666666664": 55, "421": 55, "2496": 55, "6129413187503815": 55, "0026073096491330714": 55, "97837677001954": 55, "0008606038172729313": 55, "147621154785156": 55, "07078427076339722": 55, "2784": 55, "433": 55, "585808028280735": 55, "000618002787662908": 55, "70521297454835": 55, "0011669064406305552": 55, "0313491821289": 55, "03644925355911255": 55, "270": 55, "3936": 55, "172413793103445": 55, "426": [55, 56], "3840": 55, "5633251592516899": 55, "009321122000134574": 55, "42370948791503": 55, "004084523767232895": 55, "025000000558793544": 55, "39215087890625": 55, "008745789527893066": 55, "390": [55, 63], "4224": 55, "49230769230769": 55, "442": 55, "4128": 55, "5803879588842392": 55, "014389420735763759": 55, "104": 55, "66002407073975": 55, "004097627475857735": 55, "0406250006519258": 55, "91357421875": 55, "06607359647750854": 55, "5376": 55, "64179104477611": 55, "436": [55, 63], "5280": 55, "5912896677851677": 55, "005140897812877121": 55, "03294086456299": 55, "0011872043833136559": 55, "535093307495117": 55, "058519959449768066": 55, "540": 55, "6103896103896": 55, "5753983780741692": 55, "007284667211085139": 55, "77001705169678": 55, "006244419142603874": 55, "018750000558793545": 55, "275583267211914": 55, "08379793167114258": 55, "6816": 55, "43835616438356": 55, "444": [55, 63], "6720": 55, "5323616154491901": 55, "007125963812965574": 55, "626363372802736": 55, "006300304085016251": 55, "037500000651925804": 55, "018963813781738": 55, "9616018049418926": 55, "690": 55, "7104": 55, "64044943820225": 55, "452": 55, "7008": 55, "6117519900202751": 55, "00272596117890016": 55, "280": [55, 56], "87690296173093": 55, "0006742061232216656": 55, "177": 55, "05584716796875": 55, "23516911268234253": 55, "720": 55, "8160": 55, "7710843373494": 55, "439": 55, "8064": 55, "57562275826931": 55, "007087657243634205": 55, "132426935434342": 55, "006503245793282986": 55, "013541666883975267": 55, "0611423254013062": 55, "9250432252883911": 55, "830": 55, "8352": 55, "17171717171718": 55, "441": 55, "8256": 55, "5499906323850154": 55, "006553472934062299": 55, "234399175643922": 55, "0049788737669587135": 55, "01979166707023978": 55, "909204483032227": 55, "6594350934028625": 55, "850": 55, "9600": 55, "9504": 55, "5261909484863281": 55, "0017483091195268584": 55, "570764398574829": 55, "003514515236020088": 55, "586752414703369": 55, "13919365406036377": 55, "980": 55, "9792": 55, "447": [55, 63], "9696": 55, "5318385265767575": 55, "011880275755174807": 55, "452494937181473": 55, "005105054937303066": 55, "026041667256504298": 55, "5928417444229126": 55, "9581267684698105": 55, "11136": 55, "93406593406593": 55, "450": 55, "11040": 55, "597024767100811": 55, "0026963132240780396": 55, "00028538703918": 55, "00123556365724653": 55, "44751739501953": 55, "7172763645648956": 55, "1140": 55, "11232": 55, "116": 55, "449": 55, "5200830087065696": 55, "007722906641962868": 55, "537515223026276": 55, "007102598436176777": 55, "09479166679084301": 55, "2106761932373047": 55, "3601408004760742": 55, "1150": 55, "12672": 55, "131": 55, "17021276595744": 55, "454": [55, 63], "12576": 55, "4788337767124176": 55, "038107051831805926": 55, "19971267301589252": 55, "03344881534576416": 55, "4125000021420419": 55, "027751892805099487": 55, "9155157506465912": 55, "1300": 55, "451": [55, 63], "43379202783107756": 55, "0009571537776840389": 55, "2258590620011092": 55, "00857666414231062": 55, "02291666679084301": 55, "1831377148628235": 55, "9741729144006968": 55, "13824": 55, "143": [55, 56], "138": 55, "340206185567": 55, "448": [55, 63], "13728": 55, "644160869717598": 55, "008214838248265188": 55, "01987018278450705": 55, "013853602111339569": 55, "051041666977107526": 55, "009712214581668377": 55, "004491209983825684": 55, "1420": 55, "446": 55, "49214922785758974": 55, "0010527112196238697": 55, "321832603216171": 55, "0050249057821929455": 55, "05833333460614085": 55, "8884248733520508": 55, "7490366697311401": 55, "14976": 55, "155": 55, "14880": 55, "564744371175766": 55, "00030555504467870697": 55, "583": 55, "7946674346924": 55, "618979685095837e": 55, "178": [55, 56], "1756591796875": 55, "13469618558883667": 55, "1540": 55, "48720408231019974": 55, "004510738217747256": 55, "15749059994705022": 55, "00864747166633606": 55, "051041667256504296": 55, "0023154467344284058": 55, "05674338340759277": 55, "19_3d4e7443": 55, "ppo_fourth_experimentcartpol": 55, "Be": [55, 56, 59], "visibl": 56, "mayb": [56, 60, 72, 74, 105, 106, 107], "favorit": 56, "569767441860463": 56, "591": 56, "903225806451612": 56, "567": 56, "49425287356322": 56, "557": 56, "437": 56, "6862980721518397": 56, "016145382329705173": 56, "95402302145958": 56, "009136519394814968": 56, "1068359375": 56, "268213748931885": 56, "00011879205703735352": 56, "6861314654350281": 56, "016842093877494337": 56, "17323541939258": 56, "007978597655892372": 56, "1025390625": 56, "8147406578063965": 56, "0003063678741455078": 56, "6855484075844288": 56, "015410382760455832": 56, "32087602615356": 56, "008056383579969406": 56, "105224609375": 56, "251166343688965": 56, "012730419635772705": 56, "409": 56, "6685062969103456": 56, "014946110408345703": 56, "33342697024345": 56, "008881180547177792": 56, "060693359375": 56, "630510330200195": 56, "1108359694480896": 56, "6661150485277176": 56, "013149463082663715": 56, "683698976039885": 56, "007977155968546867": 56, "043798828125": 56, "9081449508667": 56, "05941134691238403": 56, "402": 56, "6675648905336857": 56, "01585175626023556": 56, "83039126396179": 56, "008422331884503365": 56, "05068359375": 56, "283363342285156": 56, "06431382894515991": 56, "397": [56, 63], "6372709095478057": 56, "021793167035502846": 56, "082052528858185": 56, "008312474004924297": 56, "09052734375": 56, "487403869628906": 56, "29079967737197876": 56, "392": 56, "6271074561402201": 56, "021605250079301187": 56, "17835917472839": 56, "01045585609972477": 56, "107275390625": 56, "300893783569336": 56, "24486440420150757": 56, "389": 56, "641490114107728": 56, "01604906824504724": 56, "91851507425308": 56, "007528345100581646": 56, "0734375": 56, "153453826904297": 56, "22841238975524902": 56, "12288": 56, "374": 56, "6044564859941601": 56, "016754490803577937": 56, "31612868309021": 56, "009068363346159458": 56, "078857421875": 56, "16673469543457": 56, "30177778005599976": 56, "371": 56, "6121436970308423": 56, "014887585233373102": 56, "94282633662224": 56, "005902732722461224": 56, "049267578125": 56, "8435115814209": 56, "21425354480743408": 56, "368": 56, "621853212080896": 56, "01637536641501356": 56, "13811606168747": 56, "008492568507790565": 56, "06396484375": 56, "353282928466797": 56, "31684231758117676": 56, "14336": 56, "367": 56, "5713022822514177": 56, "01559052456432255": 56, "737575674057005": 56, "00888746790587902": 56, "071826171875": 56, "2188663482666": 56, "43151962757110596": 56, "365": [56, 63], "5959413398057223": 56, "01293433145910967": 56, "95801417827606": 56, "007563581224530935": 56, "06982421875": 56, "49068832397461": 56, "40706634521484375": 56, "362": 56, "6087406625971198": 56, "011938219325384126": 56, "20582329630852": 56, "005129554774612188": 56, "04287109375": 56, "536352157592773": 56, "3696613907814026": 56, "16384": 56, "363": 56, "5794724302366376": 56, "004287737552658655": 56, "43672263324261": 56, "0037438003346323967": 56, "014404296875": 56, "200799465179443": 56, "6620278060436249": 56, "360": 56, "5922138599678874": 56, "012010189255670411": 56, "09716731309891": 56, "007144401781260967": 56, "075146484375": 56, "848328590393066": 56, "5530484616756439": 56, "357": 56, "6040949983522296": 56, "009169524490425828": 56, "84913797974586": 56, "007860680110752583": 56, "072705078125": 56, "374231338500977": 56, "7407508194446564": 56, "18432": 56, "111": 56, "5607249280437827": 56, "002986471042095218": 56, "21346059292555": 56, "003013045061379671": 56, "011279296875": 56, "299112319946289": 56, "8296276032924652": 56, "112": 56, "358": 56, "5921528477221727": 56, "01051775121013634": 56, "23670785278082": 56, "005722516216337681": 56, "06689453125": 56, "937105655670166": 56, "7620555758476257": 56, "109": 56, "355": 56, "5929792949929833": 56, "005616791581269353": 56, "9369278550148": 56, "0032515935599803925": 56, "02109375": 56, "186660766601562": 56, "6016848087310791": 56, "20480": 56, "125": 56, "5653722988441586": 56, "008493624679249478": 56, "953543305397034": 56, "005177437327802181": 56, "07109375": 56, "79820442199707": 56, "7749437093734741": 56, "353": 56, "5738503985106945": 56, "005740263756888453": 56, "06800128221512": 56, "006686339154839516": 56, "03720703125": 56, "82557487487793": 56, "33640867471694946": 56, "351": 56, "5821122424677014": 56, "0035777818571659735": 56, "145361164212225": 56, "004387532360851765": 56, "018701171875": 56, "294953346252441": 56, "6190232038497925": 56, "22528": 56, "141": 56, "354": 56, "5748784447088837": 56, "008402446379477624": 56, "196025171130895": 56, "005493971519172192": 56, "05244140625": 56, "0958304405212402": 56, "9052915200591087": 56, "352": 56, "55838915547356": 56, "008732947133103153": 56, "26576453149319": 56, "005845913663506508": 56, "065673828125": 56, "673324584960938": 56, "7672396898269653": 56, "5832941999658943": 56, "010998867846501526": 56, "56470604687929": 56, "006126352585852146": 56, "073388671875": 56, "2158937454223633": 56, "8782470673322678": 56, "24576": 56, "157": 56, "5647322304546833": 56, "007764048119133804": 56, "263426271080974": 56, "007562276907265186": 56, "090771484375": 56, "511579513549805": 56, "777026578783989": 56, "164": 56, "5593959849327803": 56, "0112069135720958": 56, "52521513402462": 56, "012146038934588432": 56, "162939453125": 56, "99325180053711": 56, "779657244682312": 56, "5535886317491532": 56, "003764605871401727": 56, "87041089832783": 56, "007615496404469013": 56, "03154296875": 56, "99373245239258": 56, "35959136486053467": 56, "26624": 56, "175": 56, "5552531754598021": 56, "005408551605069078": 56, "160164260864256": 56, "005178069695830345": 56, "025634765625": 56, "7170295715332": 56, "7826626151800156": 56, "182": 56, "5358171337284148": 56, "00488179410531302": 56, "18989806524478": 56, "0034808891359716654": 56, "06181640625": 56, "12967732548713684": 56, "16228169202804565": 56, "5562285710126161": 56, "004001504971529357": 56, "59976389706135": 56, "002194597851485014": 56, "021923828125": 56, "274383068084717": 56, "8627262711524963": 56, "28672": 56, "5598000731319189": 56, "004672619019402191": 56, "96599825024605": 56, "0037293194327503443": 56, "034814453125": 56, "138860702514648": 56, "9212513640522957": 56, "201": 56, "5327114884741604": 56, "002263979368581204": 56, "354254606366155": 56, "0018954614643007517": 56, "00458984375": 56, "1828498840332": 56, "01690804958343506": 56, "194": 56, "5350399187766015": 56, "010988622946024406": 56, "65103582441807": 56, "012016495689749718": 56, "100732421875": 56, "403335094451904": 56, "8912321701645851": 56, "30720": 56, "208": 56, "5623490344733": 56, "007931908047612523": 56, "936047033965586": 56, "004620042629539967": 56, "0498046875": 56, "1860785484313965": 56, "8344163149595261": 56, "218": 56, "349": 56, "514452669210732": 56, "0014171435825119261": 56, "242323934612796": 56, "006443873047828674": 56, "032568359375": 56, "14281976222991943": 56, "007233858108520508": 56, "211": 56, "348": 56, "5367143749259412": 56, "01454816997575108": 56, "89514188542962": 56, "009338829666376114": 56, "122314453125": 56, "663129806518555": 56, "9458933025598526": 56, "32768": 56, "225": 56, "539276737626642": 56, "0037407161165901926": 56, "433760127052665": 56, "013978826813399792": 56, "064990234375": 56, "37698429822921753": 56, "035490989685058594": 56, "235": [56, 63], "4973093102686107": 56, "012424326899053994": 56, "345036637177691": 56, "008750807493925095": 56, "106884765625": 56, "18800251185894012": 56, "7767911404371262": 56, "230": 56, "346": 56, "5259521684609354": 56, "02139304491574876": 56, "581413919106126": 56, "012810716405510902": 56, "201708984375": 56, "7710778713226318": 56, "8799830973148346": 56, "34816": 56, "243": 56, "5016440353356302": 56, "005388573392338003": 56, "6560175356687978": 56, "0062754955142736435": 56, "066552734375": 56, "09270089864730835": 56, "12096387147903442": 56, "150": 56, "347": 56, "4738074015825987": 56, "0019494367443257943": 56, "4576879689877387": 56, "005790143273770809": 56, "037353515625": 56, "18722578883171082": 56, "35352087020874023": 56, "345": 56, "5140718438662588": 56, "0004109115216124337": 56, "4875038336322177": 56, "0043577756732702255": 56, "02763671875": 56, "17618514597415924": 56, "08825933933258057": 56, "36864": 56, "257": 56, "505179504211992": 56, "0033724807828548363": 56, "052925960079301": 56, "01005391776561737": 56, "107958984375": 56, "09074155241250992": 56, "022495508193969727": 56, "160": 56, "343": 56, "48579485388472676": 56, "9661558296065775e": 56, "9125513993494678": 56, "005249223671853542": 56, "029833984375": 56, "011615638621151447": 56, "20920252799987793": 56, "262": [56, 63], "342": 56, "5235147284343838": 56, "003425118201994337": 56, "1361884556215955": 56, "005556339398026466": 56, "04345703125": 56, "04526910558342934": 56, "0790131688117981": 56, "38912": 56, "5124423679895699": 56, "320563549408689e": 56, "6837369541579392": 56, "0015420113923028111": 56, "011376953125": 56, "048248302191495895": 56, "026345491409301758": 56, "291": 56, "49892428508028386": 56, "0013376812363276257": 56, "5619548875140026": 56, "005291177425533533": 56, "031787109375": 56, "08444305509328842": 56, "06384599208831787": 56, "339": 56, "5188016330823302": 56, "0005724920614738948": 56, "6982153896708041": 56, "0033194604329764843": 56, "013623046875": 56, "05807049572467804": 56, "02944713830947876": 56, "40960": 56, "294": 56, "340": 56, "114": 56, "4957636919803917": 56, "004073993970087031": 56, "4760114259843249": 56, "008029351010918617": 56, "062060546875": 56, "03772534430027008": 56, "0035400986671447754": 56, "307": 56, "338": 56, "5157848816365004": 56, "0030665539947221988": 56, "336146240857488": 56, "006352574564516544": 56, "04088807851076126": 56, "8884187638759613": 56, "337": 56, "5111317873932422": 56, "0015660247969208284": 56, "431194728880655": 56, "0047972844913601875": 56, "030078125": 56, "02516128309071064": 56, "002133488655090332": 56, "43008": 56, "4922599596902728": 56, "00019939174962928518": 56, "27829485264082904": 56, "0020252331160008907": 56, "00849609375": 56, "003118633758276701": 56, "016220271587371826": 56, "190": 56, "325": 56, "336": 56, "48774116234853865": 56, "0037827152031240986": 56, "19911157262977214": 56, "0032185050658881664": 56, "030908203125": 56, "013709803111851215": 56, "26044702529907227": 56, "4999147373251617": 56, "0014124810899375007": 56, "2843351167524816": 56, "005678324960172176": 56, "02919921875": 56, "020313650369644165": 56, "055005550384521484": 56, "45056": 56, "321": 56, "127": 56, "5042375044897198": 56, "0011692596512148158": 56, "16990109027537983": 56, "0032151443883776665": 56, "0251953125": 56, "04340684413909912": 56, "01492154598236084": 56, "334": 56, "5080808162689209": 56, "003549698476854246": 56, "1296787588755251": 56, "00621542613953352": 56, "05302734375": 56, "04627562314271927": 56, "0931699275970459": 56, "327": 56, "08": 56, "333": 56, "49875728664919733": 56, "0030554209108231587": 56, "17959667765753692": 56, "006197799928486347": 56, "040576171875": 56, "017518581822514534": 56, "0012366771697998047": 56, "47104": 56, "332": 56, "135": 56, "47473918814212085": 56, "000753292843728559": 56, "10499831844717847": 56, "0022293308284133673": 56, "022412109375": 56, "002592694014310837": 56, "007587909698486328": 56, "210": 56, "331": 56, "495706963352859": 56, "000426401813456323": 56, "08446975928891334": 56, "0015652569709345698": 56, "008056640625": 56, "010549742728471756": 56, "0012684464454650879": 56, "330": 56, "4880263367667794": 56, "0010256466252030806": 56, "11710972856089938": 56, "0032306264620274305": 56, "0166015625": 56, "178580224514008e": 56, "04952669143676758": 56, "49152": 56, "328": 56, "46606638189405203": 56, "0012806903061573394": 56, "06755283990169118": 56, "00338670052587986": 56, "015380859375": 56, "011496410705149174": 56, "011872351169586182": 56, "369": 56, "4813957496546209": 56, "0005197725258767605": 56, "05070137128532224": 56, "0011682924814522266": 56, "011328125": 56, "014267145656049252": 56, "02635061740875244": 56, "144": 56, "5085351384244859": 56, "0008456365059828386": 56, "07298006257788074": 56, "0026025455445051193": 56, "022119140625": 56, "021276511251926422": 56, "025399088859558105": 56, "51200": 56, "361": 56, "151": 56, "4632605144754052": 56, "0030253833654569464": 56, "04603174216354091": 56, "005220792256295681": 56, "01565437763929367": 56, "0232236385345459": 56, "383": 56, "323": [56, 63], "49069994343444706": 56, "0018766895205772015": 56, "03171468693126371": 56, "005815165117383003": 56, "061181640625": 56, "0051743886433541775": 56, "12434303760528564": 56, "370": 56, "322": 56, "152": 56, "49728800179436805": 56, "002779247868602397": 56, "04587990254440229": 56, "005585251376032829": 56, "0578125": 56, "01021644752472639": 56, "028099477291107178": 56, "ppo_2024": 56, "28_15": 56, "48_4fc693bc": 56, "backend": 56, "tkagg": 56, "turn": 56, "whatev": [56, 84], "come": 56, "n_simu": [56, 61, 80, 81, 86], "uniqu": [56, 63, 66, 67, 68, 76, 92, 96], "to_plot_df": 56, "global_step": [56, 61, 82, 104], "set_xlabel": 56, "set_ylabel": 56, "had": [56, 72, 74, 105, 106, 107], "data_sourc": [56, 82, 84], "string": [56, 63, 66, 67, 72, 74, 76, 78, 79, 82, 84, 104, 105, 106, 107], "Of": 56, "cours": 56, "dw_time_elaps": [56, 82], "ad": [56, 57, 104], "identifi": [56, 61, 66, 67, 68, 69, 74, 79], "visualis": [56, 110], "offer": [56, 109], "tree": [56, 66, 67], "structur": [56, 86, 109], "tensorboard_log_fold": 56, "algo_nam": [56, 86], "event": [56, 86, 104], "tfevent": [56, 86], "xxxxx": [56, 86], "tfenvent": 56, "xxx": 56, "leaf": 56, "tuto": 56, "stablebaselin": [56, 71], "log_path": 56, "path_ppo": 56, "ppo_cartpol": 56, "path_a2c": 56, "a2c_cartpol": 56, "tensorboard_log": [56, 68], "model2": 56, "model2_seed2": 56, "5_000": 56, "tb_log_nam": [56, 68], "tensorboard_folder_to_datafram": 56, "scalar": [56, 66, 67, 68, 69, 76, 104], "tensorboad": 56, "kei": [56, 66, 67, 69, 72, 74, 75, 82, 84, 86, 92, 104, 105, 106, 107], "measur": [56, 66, 109], "data_in_datafram": 56, "ppo_1": 56, "685392": 56, "719999": 56, "689999": 56, "a2c_1": 56, "384617": 56, "071430": 56, "500000": 56, "444443": 56, "2500": [56, 57, 80], "980392": 56, "070175": 56, "3500": 56, "723076": 56, "4000": 56, "358208": 56, "4500": 56, "821918": 56, "448719": 56, "a2c_2": 56, "888889": 56, "766666": 56, "097561": 56, "090908": 56, "681820": 56, "135136": 56, "414635": 56, "229885": 56, "406593": 56, "927834": 56, "3145": 56, "2201": 56, "2072": 56, "1632": 56, "1614": 56, "1607": 56, "1616": 56, "1618": 56, "1621": 56, "1620": 56, "1612": 56, "1595": 56, "1391": 56, "1455": 56, "1510": 56, "1528": 56, "1542": 56, "1555": 56, "1567": 56, "1579": 56, "1584": 56, "1590": 56, "007330": 56, "009172": 56, "080078": 56, "057373": 56, "686539": 56, "670926": 56, "620371": 56, "650572": 56, "397493": 56, "603015": 56, "657923": 56, "598895": 56, "643388": 56, "581709": 56, "652546": 56, "619731": 56, "669858": 56, "627314": 56, "628350": 56, "599171": 56, "621631": 56, "595288": 56, "526879": 56, "519675": 56, "613992": 56, "412572": 56, "011276": 56, "149804": 56, "145305": 56, "033920": 56, "038705": 56, "040012": 56, "008545": 56, "001227": 56, "000320": 56, "000204": 56, "000652": 56, "000053": 56, "010114": 56, "052339": 56, "034683": 56, "032164": 56, "006440": 56, "002829": 56, "000402": 56, "001146": 56, "011359": 56, "000118": 56, "0007": 56, "986723": 56, "091626": 56, "012042": 56, "017113": 56, "456509": 56, "260746": 56, "965336": 56, "581780": 56, "398053": 56, "973416": 56, "076917": 56, "540639": 56, "548": 56, "644836": 56, "487506": 56, "014572": 56, "567771": 56, "852792": 56, "482461": 56, "358838": 56, "897699": 56, "174485": 56, "617828": 56, "083209": 56, "551695": 56, "038313": 56, "609340": 56, "policy_loss": 56, "864703": 56, "619358": 56, "543582": 56, "340154": 56, "098149": 56, "196676": 56, "017691": 56, "157162": 56, "859272": 56, "750966": 56, "904987": 56, "656176": 56, "951152": 56, "320224": 56, "048145": 56, "843183": 56, "130708": 56, "144796": 56, "748440": 56, "190367": 56, "fulli": 57, "quick": [57, 62, 63, 108, 110], "introduct": [57, 67, 68], "incorpor": 57, "behav": [57, 104], "exactli": 57, "replac": [57, 62, 63, 109], "unifi": [57, 73, 91, 92, 93, 94, 95, 96], "reproduc": [57, 59, 60, 63, 64, 97, 100, 102, 111], "quickli": 57, "detail": [57, 61, 62, 75, 110], "under": [57, 60, 62, 64, 72, 74, 105, 106, 107], "hood": 57, "metric": [57, 60, 72, 74, 105, 106, 107, 108, 110], "00000001": 57, "vf_coef": 57, "log_interv": [57, 104], "400": 57, "dev": [57, 62, 64, 72, 74, 105, 106, 107, 109, 112], "subclass": [57, 66, 67, 68], "stats_altern": 57, "456": [57, 63], "600": [57, 62], "n_optuna_work": [57, 76], "optuna_parallel": [57, 76], "fit_fract": [57, 76], "everyth": [57, 63], "readabl": 58, "set_level": [58, 63, 64], "examl": 58, "anymor": 58, "keep": [58, 62, 66, 67, 104], "By": [58, 60, 66, 67, 72, 74, 105, 106, 107], "tabular_rl": 59, "qlagent": 59, "experiment_to_sav": 59, "exploration_typ": 59, "arg": [59, 72, 74, 83, 91, 92, 93, 94, 95, 96, 105, 106, 107], "300000": 59, "ql": 59, "qlfrozenlak": 59, "178711": 59, "futurewarn": 59, "behavior": [59, 60, 61, 62, 63, 64, 89], "concaten": [59, 75, 104], "empti": 59, "futur": [59, 60, 67, 68], "longer": [59, 72, 74, 104, 105, 106, 107], "exclud": 59, "determin": [59, 76], "dtype": [59, 69, 91, 93, 94, 95], "retain": 59, "relev": 59, "concat": 59, "oper": [59, 60, 76, 109], "_data": [59, 104], "ignore_index": 59, "77377103": 59, "77378092": 59, "At": [59, 75], "Or": 59, "temporari": 59, "tempfil": 59, "temporarydirectori": 59, "tmpdir": 59, "csv": [59, 77], "locat": 59, "get_single_path_of_most_recently_trained_experiment_manager_obj_from_path": 59, "recent": [59, 84], "loading_tool": 59, "path_to_load": 59, "loaded_experiment_manag": 59, "test_env": 59, "next_observ": [59, 69], "success": [59, 66, 67, 68], "retri": 59, "haven": 59, "highli": [59, 66], "output_dir_path": 59, "env_seed_max_valu": 59, "agent_to_train_and_sav": 59, "195540": 59, "1830874": 59, "15802259": 59, "12087594": 59, "16358512": 59, "16674384": 59, "10049071": 59, "09517673": 59, "11326436": 59, "07236883": 59, "10552007": 59, "06660356": 59, "07020302": 59, "1104349": 59, "23065463": 59, "19028937": 59, "20689438": 59, "08408004": 59, "17382279": 59, "2417443": 59, "29498867": 59, "46487572": 59, "52043878": 59, "56986596": 59, "19259904": 59, "57831479": 59, "6858159": 59, "22998936": 59, "39350426": 59, "env_for_load": 59, "params_for_load": 59, "loaded_ag": 59, "reus": 59, "know": 60, "sequenc": [60, 66, 67, 72, 74, 76, 89, 91, 92, 93, 94, 95, 96, 105, 106, 107], "produc": [60, 72, 74, 76, 105, 106, 107], "repeat": [60, 108], "reason": [60, 72, 74, 105, 106, 107], "ani": [60, 61, 64, 66, 67, 68, 72, 74, 76, 91, 92, 93, 94, 95, 96, 105, 106, 107], "essenti": 60, "debug": [60, 61, 64, 68, 72, 74, 103, 105, 106, 107], "valid": [60, 91, 92, 93, 94, 95, 96], "encount": [60, 104], "issu": [60, 62, 63, 64, 90], "exact": 60, "led": 60, "fix": [60, 62, 63, 64, 66, 67, 83], "guarante": [60, 91, 92, 93, 94, 95, 96], "trace": 60, "consist": 60, "anoth": [60, 61, 82, 104, 110], "review": [60, 62, 64], "confer": 60, "advertis": 60, "scenario": 60, "understood": 60, "commun": 60, "stakehold": 60, "machin": [60, 76, 109], "reli": 60, "help": [60, 62, 63, 72, 74, 105, 106, 107, 108], "across": [60, 66, 67, 76], "competit": 60, "ident": 60, "condit": [60, 72, 74, 105, 106, 107], "split": 60, "lead": [60, 89, 104], "alwai": [60, 62], "verif": 60, "result_list": 60, "entropi": [60, 61, 66, 67, 72, 74, 76, 89, 91, 93, 94, 95, 105, 106, 107], "spawn_kei": 60, "567498838741829": 60, "6356604305460527": 60, "n_children_spawn": 60, "2466559261185188": 60, "8402527193117317": 60, "4732958445958833": 60, "5863995575997462": 60, "1722486099076424": 60, "1930990650226178": 60, "current": [60, 63, 64, 67, 68, 72, 74, 76, 85, 104, 105, 106, 107], "randint": [60, 61], "item": [60, 92], "3817148928": 60, "671396126": 60, "2950680447": 60, "791815335": 60, "3335786391": 60, "82990446": 60, "2463687945": 60, "1829003305": 60, "647811387": 60, "3543380778": 60, "3887070615": 60, "363268341": 60, "3607514851": 60, "3881090947": 60, "1018754931": 60, "693246422": 60, "3606543353": 60, "433394544": 60, "2194426398": 60, "3928404622": 60, "customari": 61, "typic": [61, 72, 74, 76, 105, 106, 107, 108], "acquisit": 61, "mainli": [61, 110], "teach": [61, 63, 108, 110], "student": [61, 110], "gif_writ": 61, "saving_path": 61, "visu_gymnasium_gif": 61, "neural": 61, "due": [61, 72, 74, 105, 106, 107], "variablil": 61, "goe": 61, "being": [61, 63], "fed": [61, 70], "xtag": [61, 63, 82], "synchron": [61, 62, 82], "coordin": [61, 91], "hand": [61, 62], "kind": [61, 86], "aggreg": [61, 78], "distinct": 61, "too": [61, 64], "rug": 61, "blue": 61, "averag": [61, 108], "light": 61, "inidividu": 61, "savefig": [61, 80, 81, 82], "png": 61, "bar": 61, "explicit": 61, "vertic": 61, "errorbar": 61, "band": [61, 63, 80, 82], "analysi": [61, 80, 82, 109], "along": 61, "whole": [61, 76], "prefer": [61, 62], "interpret": [61, 76, 77], "docstr": [61, 62, 63], "plu": [61, 80, 81, 82], "minu": [61, 80, 81, 82], "quantil": [61, 80, 81, 82], "divid": [61, 80, 81, 82], "sqrt": [61, 80, 81, 82], "choose_random_ag": [61, 79], "example_ev": 61, "repositori": [62, 63, 64, 110], "submit": 62, "pr": [62, 63, 64, 112], "local": [62, 63, 64], "git": [62, 64, 109], "account": [62, 80, 82], "click": 62, "button": 62, "copi": [62, 66, 67, 68, 72, 76], "clone": 62, "repo": 62, "disk": 62, "your_login": 62, "connect": [62, 72, 74, 105, 106, 107], "slow": 62, "cd": [62, 64], "poetri": [62, 63, 64], "pip": [62, 109, 110], "curl": [62, 64], "ssl": [62, 64], "venv": 62, "sync": [62, 64], "shell": 62, "yourcommandher": 62, "upstream": 62, "remot": [62, 63], "latest": [62, 82, 84], "properli": 62, "branch": [62, 63], "checkout": 62, "fetch": 62, "merg": [62, 63], "hold": 62, "my_featur": 62, "edit": 62, "modified_fil": 62, "push": 62, "origin": 62, "instruct": 62, "send": [62, 66, 67], "email": 62, "committ": 62, "subsequ": 62, "conflict": 62, "relat": 62, "resolv": 62, "rebas": 62, "softwar": [62, 109], "style": [62, 72, 74, 76, 104, 105, 106, 107], "flake8": 62, "reformat": 62, "recommit": 62, "verifi": 62, "autopep8": 62, "yourfil": 62, "approv": 62, "mrg": 62, "complet": [62, 72, 109], "subject": 62, "incomplet": 62, "receiv": [62, 63], "wip": 62, "matur": 62, "someth": [62, 64], "duplic": 62, "broad": 62, "seek": 62, "collabor": 62, "benefit": 62, "inclus": 62, "task": [62, 72, 74, 105, 106, 107, 108], "pytest": [62, 64], "guidelin": [62, 108, 110, 111], "enhanc": 62, "correct": [62, 63, 66, 67, 69], "auto": 62, "pep8": 62, "violat": 62, "azur": [62, 64], "pipelin": [62, 63, 64, 70], "mac": 62, "window": [62, 72, 74, 105, 106, 107], "view": [62, 92], "articl": 62, "written": [62, 104], "restructuredtext": [62, 64], "rst": 62, "myst": [62, 109], "parser": [62, 109], "_nameref": 62, "namer": 62, "md": 62, "syntax": 62, "explan": [62, 108], "video_plot_my_experi": 62, "width": 62, "sphinx_gallery_thumbnail_path": 62, "thumbnail": 62, "jpg": 62, "my_experi": 62, "slower": 62, "guidel": 62, "472": 63, "integr": 63, "468": 63, "407": 63, "make_gym": 63, "453": 63, "463": 63, "typo": 63, "patch": 63, "writer_data": 63, "438": 63, "445": 63, "455": 63, "411": 63, "405": 63, "406": 63, "408": 63, "404": 63, "experiencemanag": 63, "396": 63, "coverag": [63, 64], "workflow": [63, 108, 109], "385": 63, "rtd": 63, "379": 63, "materi": 63, "simplifi": 63, "376": 63, "seaborn": 63, "confid": [63, 80, 81, 82, 108], "276": 63, "326": 63, "upgrad": 63, "281": 63, "318": 63, "gpu": 63, "make_atari_env": [63, 71], "togeth": 63, "298": 63, "jax": [63, 76, 77], "attent": [63, 76], "277": 63, "273": 63, "percentil": 63, "interv": [63, 80, 81, 82, 91, 108], "sd": 63, "261": 63, "mdqnagent": 63, "mdqn": 63, "244": 63, "253": 63, "compress": [63, 66, 67], "link": [63, 108], "spring": 63, "226": 63, "227": 63, "curv": [63, 80, 81, 82, 110], "223": 63, "132": 63, "tracker": 63, "bandittrack": 63, "track": 63, "191": 63, "161": 63, "replai": 63, "replaybuff": 63, "memori": 63, "feb": 63, "2022": 63, "126": 63, "__version__": 63, "0dev0": 63, "attribut": [63, 66, 67, 68, 69, 72, 74, 75, 76, 77, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "overrid": [63, 104], "__eq__": 63, "118": 63, "feat": 63, "finitemdp": 63, "timestamp": [63, 72, 74, 76, 105, 106, 107], "short": [63, 64], "layout": 63, "simpler": 63, "quickstart": [63, 108, 110, 111], "rlsvi": 63, "tabular": 63, "rlsviagent": 63, "posterior": 63, "psrl": 63, "psrlagent": 63, "contributor": 63, "unique_id": [63, 66, 67, 68], "assign": 63, "remoteexperimentmanag": [63, 104], "transfer": [63, 110], "basewrapp": 63, "convert": [63, 72, 86, 91, 92, 93, 94, 95, 96, 109], "default_rng": [63, 69], "randomst": 63, "agenthandl": 63, "miss": 63, "agentstat": 63, "free": 63, "pomdp": 63, "multi": [63, 95, 109], "processpoolexecutor": 63, "threadpoolexecutor": 63, "nest": 63, "reverb": 63, "client": 63, "exchang": 63, "messag": 63, "socket": 63, "report": [64, 76], "standalon": 64, "pull": 64, "rest": 64, "beginn": 64, "question": [64, 72], "trigger": 64, "glad": 64, "sort": 64, "markdown": 64, "live": 64, "assum": 64, "_build": 64, "examples_pattern": 64, "your_regex_goes_her": 64, "cover": 64, "resourc": 64, "root": 64, "long_test": 64, "ltest": 64, "test_": 64, "belong": 64, "ltest_": 64, "numpydoc": [64, 109], "rather": [64, 72, 74, 105, 106, 107], "statement": 64, "outsid": [64, 72, 74, 105, 106, 107], "callabl": [66, 67, 68, 76, 78, 82, 84], "copy_env": [66, 67, 68], "bool": [66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 84, 88, 89, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "compress_pickl": [66, 67], "_execution_metadata": [66, 67, 68], "executionmetadata": [66, 67, 68, 104], "_default_writer_kwarg": [66, 67, 68], "_thread_shared_data": [66, 67, 68], "abc": 66, "agenttorch": 66, "abstract": [66, 67], "overridden": [66, 67], "match": [66, 67, 76, 84], "bz2": [66, 67], "_gener": [66, 67, 89, 91, 92, 93, 94, 95, 96], "thread_shared_data": [66, 67, 68, 76], "float": [66, 67, 68, 69, 72, 74, 75, 76, 78, 80, 81, 82, 91, 104, 105, 106, 107], "qualiti": 66, "keyword": [66, 67, 68, 71, 73, 83, 98, 99, 100, 102], "complex": [66, 67, 76, 109], "mct": [66, 67], "ideal": [66, 67], "budget1": [66, 67], "budget2": [66, 67], "equival": [66, 67], "properti": [66, 67, 68, 69, 72, 74, 91, 92, 93, 94, 95, 96, 104, 105, 106, 107], "reduc": [66, 67, 82], "earli": [66, 67, 75, 108], "strictli": [66, 67], "elsewher": [66, 67], "subobject": [66, 67, 68, 72, 74, 105, 106, 107], "filenam": [66, 67, 68, 72, 76, 105, 106, 107], "filepath": [66, 67], "seed_seq": [66, 67, 68, 72, 74, 76, 88, 89, 91, 93, 94, 95, 105, 106, 107], "dill": [66, 67], "r5d46c33e8424": 66, "stackoverflow": [66, 67, 72], "25353243": [66, 67], "tri": [66, 67], "pathlib": [66, 67, 68, 76], "otherwis": [66, 67, 68, 69, 72, 74, 79, 88, 89, 105, 106, 107], "suffix": [66, 67], "uqfound": [66, 67], "_writer": [66, 67, 68, 82, 84], "safe": [66, 67, 68, 78], "overwritten": [67, 76], "sutton": [67, 68, 72, 74, 105, 106, 107], "barto": [67, 68], "2018": [67, 68], "mit": [67, 68], "press": [67, 68], "vari": 67, "r466db297bd20": 67, "basealgorithm": 68, "basepolici": 68, "tensorflow": 68, "reset_num_timestep": 68, "num_timestep": 68, "set_logg": 68, "sb3": [68, 109], "enable_priorit": 69, "shape": [69, 72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "time_s": 69, "priorit": 69, "sampling_mod": 69, "degre": 69, "blob": [69, 72], "master": [69, 71, 72, 91], "deepq": 69, "replay_buff": 69, "100_000": 69, "setup_entri": 69, "float32": 69, "uint32": 69, "end_episod": 69, "trajectori": 69, "dictionari": [69, 70, 72, 74, 75, 76, 82, 84, 86, 92, 105, 106, 107], "uniformli": [69, 93], "namedtupl": 69, "obj": [69, 78, 88], "meth": 69, "update_prior": 69, "new_prior": 69, "prioriti": 69, "wrapper_kwarg": 70, "wrapper1": 70, "wrapper2": 70, "reward_rang": [70, 74, 105], "unscal": 70, "adaptor": 71, "n_frame_stack": 71, "readthedoc": 71, "io": [71, 78, 108], "en": 71, "env_util": 71, "atari_wrappers_dict": 71, "terminal_on_life_loss": 71, "similar": 72, "point": [72, 82], "initiallth": 72, "meatadata": 72, "1443129": 72, "farama": 72, "foundat": 72, "class_nam": [72, 105, 106, 107], "is_render_en": [72, 105, 106, 107], "clean": [72, 74, 105, 106, 107], "databas": [72, 74, 105, 106, 107], "rais": [72, 74, 91, 96, 100, 105, 106, 107], "get_video": [72, 105, 106, 107], "is_gen": [72, 74, 105, 106, 107], "is_onlin": [72, 74, 105, 106, 107], "np_random": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "intern": [72, 74, 89, 105, 106, 107], "_np_random": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "initialis": [72, 74, 105, 106, 107], "known": [72, 74, 105, 106, 107], "convent": [72, 74, 105, 106, 107], "human": [72, 74, 105, 106, 107], "consumpt": [72, 74, 105, 106, 107], "doesn": [72, 74, 105, 106, 107], "ndarrai": [72, 74, 91, 93, 94, 95, 105, 106, 107], "pixel": [72, 74, 105, 106, 107], "ansi": [72, 74, 105, 106, 107], "stringio": [72, 74, 105, 106, 107], "newlin": [72, 74, 105, 106, 107], "escap": [72, 74, 105, 106, 107], "color": [72, 74, 105, 106, 107], "rgb_array_list": [72, 74, 105, 106, 107], "ansi_list": [72, 74, 105, 106, 107], "rendercollect": [72, 74, 105, 106, 107], "pop": [72, 74, 105, 106, 107], "generalis": [72, 74, 105, 106, 107], "therefor": [72, 74, 105, 106, 107, 109], "correctli": [72, 74, 105, 106, 107], "return_info": [72, 74, 105, 106, 107], "prng": [72, 74, 91, 92, 93, 94, 95, 96, 105, 106, 107], "urandom": [72, 74, 105, 106, 107], "paradigm": [72, 74, 105, 106, 107], "obstyp": [72, 74, 105, 106, 107], "analog": [72, 74, 105, 106, 107], "auxiliari": [72, 74, 105, 106, 107], "complement": [72, 74, 105, 106, 107], "dynam": [72, 74, 105, 106, 107], "favor": [72, 74, 105, 106, 107], "clearer": [72, 74, 105, 106, 107], "bootstrap": [72, 74, 80, 82, 105, 106, 107], "acttyp": [72, 74, 105, 106, 107], "veloc": [72, 74, 105, 106, 107], "pole": [72, 74, 105, 106, 107], "supportsfloat": [72, 74, 105, 106, 107], "lava": [72, 74, 105, 106, 107], "barton": [72, 74, 105, 106, 107], "scope": [72, 74, 105, 106, 107], "satisfi": [72, 74, 105, 106, 107], "physic": [72, 74, 105, 106, 107], "prematur": [72, 74, 105, 106, 107], "diagnost": [72, 74, 105, 106, 107], "hidden": [72, 74, 105, 106, 107], "individu": [72, 74, 76, 105, 106, 107], "term": [72, 74, 105, 106, 107], "combin": [72, 74, 105, 106, 107], "v26": [72, 74, 105, 106, 107], "distinguish": [72, 74, 105, 106, 107], "favour": [72, 74, 105, 106, 107], "boolean": [72, 74, 80, 81, 82, 85, 87, 91, 92, 93, 94, 95, 96, 105, 106, 107], "undefin": [72, 74, 105, 106, 107], "signal": [72, 74, 105, 106, 107], "emit": [72, 74, 105, 106, 107], "underli": [72, 74, 105, 106, 107], "successfulli": [72, 74, 105, 106, 107, 108], "exceed": [72, 74, 105, 106, 107], "enter": [72, 74, 105, 106, 107], "invalid": [72, 74, 93, 105, 106, 107], "r_min": 74, "r_max": 74, "minimum": [74, 104], "renderfram": 74, "multipleagentscompar": 75, "n_agent": 75, "pairwis": 75, "spent": [75, 76], "manager_list": 75, "n_evalu": 75, "compute_mean_diff": 75, "absolut": 75, "get_result": 75, "partial_compar": 75, "eval_valu": 75, "th": 75, "interim": 75, "till": 75, "pp0": 75, "id_finish": 75, "bk": 75, "threshold": 75, "ax1": 75, "ax2": 75, "gridspec_kw": 75, "height_ratio": 75, "plot_results_sota": 75, "worker_logging_level": 76, "outdir_id_styl": 76, "default_writer_kwarg": 76, "init_kwargs_per_inst": 76, "doubl": [76, 105], "processor": 76, "multipli": 76, "forkserv": [76, 77], "context": [76, 77], "pytorch": [76, 77, 90, 109], "agent_name_": 76, "agent_name_unique_id": 76, "agent_name_timestamp_short_id": 76, "style_log": [76, 104], "progressbar": [76, 104], "one_lin": [76, 104], "rlberry_vers": 76, "guard": 76, "program": 76, "1e4": [76, 84], "build_eval_env": 76, "reseed": [76, 88], "clear_handl": 76, "clear_output_dir": 76, "randomli": 76, "facilit": [76, 108], "conduct": 76, "agent_manag": [76, 78], "evaluation_result": 76, "generate_profil": 76, "fname": 76, "agent_name_profil": 76, "prof": 76, "pruner_method": 76, "continue_previ": 76, "sampler_kwarg": 76, "disable_evaluation_writ": 76, "custom_eval_funct": 76, "tpe": 76, "cmae": 76, "cma": 76, "studi": 76, "unlimit": 76, "pruner": 76, "fraction": 76, "partial": 76, "prune": 76, "sampler": 76, "person": 76, "evaluation_funct": 76, "agent_list": 76, "evaluation_function_kwarg": 76, "trained_ag": 76, "later": 76, "recreat": 76, "writer_fn": 76, "writer_kwarg": 76, "intanc": 77, "immedi": 77, "agent_sourc": 78, "mean_ev": 78, "scipi": 78, "down": 78, "tukei": 78, "hsd": 78, "parametr": 78, "assumpt": 78, "heavi": 78, "tail": 78, "multimod": 78, "summaris": 78, "devdoc": 78, "lehmann": 78, "joseph": 78, "romano": 78, "1007": 78, "387": 78, "27605": 78, "springer": 78, "experiment_manager_list": 79, "fignum": 79, "smoothing_bandwidth": [80, 82], "n_boot": [80, 82], "savefig_fnam": [80, 81, 82], "until": 80, "choosen": [80, 82], "heurist": [80, 82], "infer": [80, 82], "harder": [80, 82], "draw": [80, 82, 96], "conclus": [80, 82], "wide": [80, 82], "id_ag": [82, 84], "sub_sampl": 82, "subdirectori": [82, 84], "date": [82, 84], "nadaraya": 82, "watson": 82, "pb": 82, "cost": 82, "noth": [82, 88], "4e4": 82, "kwd": 83, "preset": 83, "manager_mak": 83, "many_agent_by_str_datasourc": 84, "datasourc": 84, "path_to_tensorboard_data": 86, "preced": 87, "venv_fir_nam": 87, "tqdm": 87, "run_tqdm": 87, "noqa": 87, "sleep": 87, "test_venv": 87, "reseed_spac": 88, "seed_val": 88, "spawn_seed_seq": 89, "bit_gener": 89, "strongli": 89, "unexpect": 89, "unnecessari": 89, "generate_st": 89, "squeez": 89, "7068": 90, "issuecom": 90, "487907668": 90, "low": 91, "float64": 91, "cartesian": 91, "b_1": 91, "a_2": 91, "b_2": 91, "a_n": 91, "b_n": 91, "mechan": [91, 92, 93, 94, 95, 96], "member": [91, 92, 93, 94, 95, 96], "from_json": [91, 92, 93, 94, 95, 96], "sample_n": [91, 92, 93, 94, 95, 96], "jsonabl": [91, 92, 94, 95, 96], "is_bound": 91, "manner": 91, "valueerror": [91, 96], "neither": 91, "nor": 91, "is_np_flatten": [91, 92, 93, 94, 95, 96], "flatten": [91, 92, 93, 94, 95, 96], "lazili": [91, 92, 93, 94, 95, 96], "expens": [91, 92, 93, 94, 95, 96], "githubusercont": 91, "oo": 91, "shift": 91, "possibli": [91, 93, 94, 95], "subspac": [91, 92, 93, 94, 95, 96], "stricter": [91, 94, 95], "to_json": [91, 92, 93, 94, 95, 96], "spaces_kwarg": 92, "ordereddict": 92, "keysview": 92, "mask": [92, 93, 94, 95, 96], "constitu": 92, "composit": [92, 96], "immut": [92, 93, 96], "int64": [93, 95], "json": 93, "int8": [93, 94, 95], "infeas": 93, "coin": 94, "toss": 94, "binari": 94, "nvec": 95, "unless": 95, "smallest": 95, "count": 96, "occurr": 96, "almost": 97, "continuous_st": [98, 99, 100, 101, 102], "discrete_st": [98, 99, 100, 102, 106], "notset": 103, "print_log": 104, "multi_lin": 104, "tensorboard_kwarg": 104, "execution_metadata": 104, "maxlen": 104, "maxlen_by_tag": 104, "stderr": 104, "metadata_util": 104, "logic": 104, "summary_writ": 104, "set_max_global_step": 104, "scalar_valu": 104, "walltim": 104, "new_styl": 104, "confus": 104, "tensor": [104, 109], "field": 104, "simple_valu": 104, "main_tag": 104, "tag_scalar_dict": 104, "read_first_tag_valu": 104, "read_last_tag_valu": 104, "read_tag_valu": 104, "seri": 104, "spec": [105, 106, 107], "get_continuous_st": 106, "get_discrete_st": 106, "fun": 108, "lot": 108, "bore": 108, "tricki": 108, "spend": 108, "nice": 108, "comprehens": 108, "proper": 108, "hackabl": 108, "userguid": 108, "changelog": 108, "princip": 108, "acquir": 108, "propos": 108, "hypothesi": [108, 110], "paper": 108, "todo": 108, "suggest": 109, "miniconda": 109, "releas": 109, "recommand": 109, "framework": 109, "filter": 109, "platform": 109, "hobbyist": 109, "ai": 109, "opencv": 109, "vision": 109, "dimension": 109, "mathemat": 109, "xxxxxxxx": 109, "math": 109, "dollar": 109, "sphinxcontrib": 109, "zsh": 109, "bracket": 109, "glob": 109, "quot": 109, "tag_nam": 109, "torch_ag": 109, "welcom": 110, "popular": 110, "easiest": 110, "period": [110, 111], "export": 110, "extract": 110, "writerdata": 110, "yaml": 111, "config": 111, "preview": 112}, "objects": {"rlberry.agents": [[66, 0, 1, "", "Agent"], [67, 0, 1, "", "AgentWithSimplePolicy"]], "rlberry.agents.Agent": [[66, 1, 1, "", "eval"], [66, 1, 1, "", "fit"], [66, 1, 1, "", "get_params"], [66, 1, 1, "", "load"], [66, 2, 1, "", "output_dir"], [66, 1, 1, "", "reseed"], [66, 2, 1, "", "rng"], [66, 1, 1, "", "sample_parameters"], [66, 1, 1, "", "save"], [66, 1, 1, "", "set_writer"], [66, 2, 1, "", "thread_shared_data"], [66, 2, 1, "", "unique_id"], [66, 2, 1, "", "writer"]], "rlberry.agents.AgentWithSimplePolicy": [[67, 1, 1, "", "eval"], [67, 1, 1, "", "fit"], [67, 1, 1, "", "get_params"], [67, 1, 1, "", "load"], [67, 2, 1, "", "output_dir"], [67, 1, 1, "", "policy"], [67, 1, 1, "", "reseed"], [67, 2, 1, "", "rng"], [67, 1, 1, "", "sample_parameters"], [67, 1, 1, "", "save"], [67, 1, 1, "", "set_writer"], [67, 2, 1, "", "thread_shared_data"], [67, 2, 1, "", "unique_id"], [67, 2, 1, "", "writer"]], "rlberry.agents.stable_baselines": [[68, 0, 1, "", "StableBaselinesAgent"]], "rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, 1, 1, "", "eval"], [68, 1, 1, "", "fit"], [68, 1, 1, "", "get_params"], [68, 1, 1, "", "load"], [68, 2, 1, "", "output_dir"], [68, 1, 1, "", "policy"], [68, 1, 1, "", "reseed"], [68, 2, 1, "", "rng"], [68, 1, 1, "", "sample_parameters"], [68, 1, 1, "", "save"], [68, 1, 1, "", "set_logger"], [68, 1, 1, "", "set_writer"], [68, 2, 1, "", "thread_shared_data"], [68, 2, 1, "", "unique_id"], [68, 2, 1, "", "writer"]], "rlberry.agents.utils.replay": [[69, 0, 1, "", "ReplayBuffer"]], "rlberry.agents.utils.replay.ReplayBuffer": [[69, 1, 1, "", "append"], [69, 1, 1, "", "clear"], [69, 2, 1, "", "data"], [69, 2, 1, "", "dtypes"], [69, 1, 1, "", "end_episode"], [69, 2, 1, "", "max_episode_steps"], [69, 1, 1, "", "sample"], [69, 1, 1, "", "setup_entry"], [69, 2, 1, "", "tags"], [69, 1, 1, "", "update_priorities"]], "rlberry.envs": [[70, 3, 1, "", "PipelineEnv"], [71, 3, 1, "", "atari_make"], [73, 3, 1, "", "gym_make"]], "rlberry.envs.basewrapper": [[72, 0, 1, "", "Wrapper"]], "rlberry.envs.basewrapper.Wrapper": [[72, 1, 1, "", "close"], [72, 1, 1, "", "get_params"], [72, 1, 1, "", "get_video"], [72, 1, 1, "", "get_wrapper_attr"], [72, 1, 1, "", "is_generative"], [72, 1, 1, "", "is_online"], [72, 2, 1, "", "np_random"], [72, 1, 1, "", "render"], [72, 1, 1, "", "reseed"], [72, 1, 1, "", "reset"], [72, 2, 1, "", "rng"], [72, 1, 1, "", "sample"], [72, 1, 1, "", "save_video"], [72, 1, 1, "", "step"], [72, 2, 1, "", "unwrapped"]], "rlberry.envs.interface": [[74, 0, 1, "", "Model"]], "rlberry.envs.interface.Model": [[74, 1, 1, "", "close"], [74, 1, 1, "", "get_params"], [74, 1, 1, "", "get_wrapper_attr"], [74, 1, 1, "", "is_generative"], [74, 1, 1, "", "is_online"], [74, 2, 1, "", "np_random"], [74, 1, 1, "", "render"], [74, 1, 1, "", "reseed"], [74, 1, 1, "", "reset"], [74, 2, 1, "", "rng"], [74, 1, 1, "", "sample"], [74, 1, 1, "", "step"], [74, 2, 1, "", "unwrapped"]], "rlberry.manager": [[75, 0, 1, "", "AdastopComparator"], [76, 0, 1, "", "ExperimentManager"], [77, 0, 1, "", "MultipleManagers"], [78, 3, 1, "", "compare_agents"], [79, 3, 1, "", "evaluate_agents"], [80, 3, 1, "", "plot_smoothed_curves"], [81, 3, 1, "", "plot_synchronized_curves"], [82, 3, 1, "", "plot_writer_data"], [83, 3, 1, "", "preset_manager"], [84, 3, 1, "", "read_writer_data"], [85, 3, 1, "", "run_venv_xp"], [86, 3, 1, "", "tensorboard_folder_to_dataframe"], [87, 3, 1, "", "with_venv"]], "rlberry.manager.AdastopComparator": [[75, 1, 1, "", "compare"], [75, 1, 1, "", "compute_mean_diffs"], [75, 1, 1, "", "get_results"], [75, 1, 1, "", "partial_compare"], [75, 1, 1, "", "plot_results"], [75, 1, 1, "", "plot_results_sota"], [75, 1, 1, "", "print_results"]], "rlberry.manager.ExperimentManager": [[76, 1, 1, "", "build_eval_env"], [76, 1, 1, "", "clear_handlers"], [76, 1, 1, "", "clear_output_dir"], [76, 1, 1, "", "eval_agents"], [76, 1, 1, "", "fit"], [76, 1, 1, "", "generate_profile"], [76, 1, 1, "", "get_agent_instances"], [76, 1, 1, "", "get_writer_data"], [76, 1, 1, "", "load"], [76, 1, 1, "", "optimize_hyperparams"], [76, 1, 1, "", "save"], [76, 1, 1, "", "set_writer"]], "rlberry.manager.MultipleManagers": [[77, 1, 1, "", "append"], [77, 1, 1, "", "run"], [77, 1, 1, "", "save"]], "rlberry.seeding": [[88, 3, 1, "", "safe_reseed"], [90, 3, 1, "", "set_external_seed"]], "rlberry.seeding.seeder": [[89, 0, 1, "", "Seeder"]], "rlberry.seeding.seeder.Seeder": [[89, 1, 1, "", "reseed"], [89, 1, 1, "", "spawn"]], "rlberry.spaces": [[91, 0, 1, "", "Box"], [92, 0, 1, "", "Dict"], [93, 0, 1, "", "Discrete"], [94, 0, 1, "", "MultiBinary"], [95, 0, 1, "", "MultiDiscrete"], [96, 0, 1, "", "Tuple"]], "rlberry.spaces.Box": [[91, 1, 1, "", "contains"], [91, 1, 1, "", "from_jsonable"], [91, 1, 1, "", "is_bounded"], [91, 2, 1, "", "is_np_flattenable"], [91, 2, 1, "", "np_random"], [91, 1, 1, "", "reseed"], [91, 1, 1, "", "sample"], [91, 1, 1, "", "seed"], [91, 2, 1, "", "shape"], [91, 1, 1, "", "to_jsonable"]], "rlberry.spaces.Dict": [[92, 1, 1, "", "contains"], [92, 1, 1, "", "from_jsonable"], [92, 1, 1, "", "get"], [92, 2, 1, "", "is_np_flattenable"], [92, 1, 1, "", "items"], [92, 1, 1, "", "keys"], [92, 2, 1, "", "np_random"], [92, 1, 1, "", "sample"], [92, 1, 1, "", "seed"], [92, 2, 1, "", "shape"], [92, 1, 1, "", "to_jsonable"], [92, 1, 1, "", "values"]], "rlberry.spaces.Discrete": [[93, 1, 1, "", "contains"], [93, 1, 1, "", "from_jsonable"], [93, 2, 1, "", "is_np_flattenable"], [93, 2, 1, "", "np_random"], [93, 1, 1, "", "reseed"], [93, 1, 1, "", "sample"], [93, 1, 1, "", "seed"], [93, 2, 1, "", "shape"], [93, 1, 1, "", "to_jsonable"]], "rlberry.spaces.MultiBinary": [[94, 1, 1, "", "contains"], [94, 1, 1, "", "from_jsonable"], [94, 2, 1, "", "is_np_flattenable"], [94, 2, 1, "", "np_random"], [94, 1, 1, "", "reseed"], [94, 1, 1, "", "sample"], [94, 1, 1, "", "seed"], [94, 2, 1, "", "shape"], [94, 1, 1, "", "to_jsonable"]], "rlberry.spaces.MultiDiscrete": [[95, 1, 1, "", "contains"], [95, 1, 1, "", "from_jsonable"], [95, 2, 1, "", "is_np_flattenable"], [95, 2, 1, "", "np_random"], [95, 1, 1, "", "reseed"], [95, 1, 1, "", "sample"], [95, 1, 1, "", "seed"], [95, 2, 1, "", "shape"], [95, 1, 1, "", "to_jsonable"]], "rlberry.spaces.Tuple": [[96, 1, 1, "", "contains"], [96, 1, 1, "", "count"], [96, 1, 1, "", "from_jsonable"], [96, 1, 1, "", "index"], [96, 2, 1, "", "is_np_flattenable"], [96, 2, 1, "", "np_random"], [96, 1, 1, "", "sample"], [96, 1, 1, "", "seed"], [96, 2, 1, "", "shape"], [96, 1, 1, "", "to_jsonable"]], "rlberry.utils": [[97, 3, 1, "", "check_env"], [98, 3, 1, "", "check_experiment_manager"], [99, 3, 1, "", "check_fit_additive"], [100, 3, 1, "", "check_rl_agent"], [101, 3, 1, "", "check_save_load"], [102, 3, 1, "", "check_seeding_agent"]], "rlberry.utils.logging": [[103, 3, 1, "", "set_level"]], "rlberry.utils.writers": [[104, 0, 1, "", "DefaultWriter"]], "rlberry.utils.writers.DefaultWriter": [[104, 1, 1, "", "add_scalar"], [104, 1, 1, "", "add_scalars"], [104, 1, 1, "", "read_first_tag_value"], [104, 1, 1, "", "read_last_tag_value"], [104, 1, 1, "", "read_tag_value"], [104, 1, 1, "", "reset"]], "rlberry.wrappers": [[105, 0, 1, "", "RescaleRewardWrapper"]], "rlberry.wrappers.RescaleRewardWrapper": [[105, 1, 1, "", "close"], [105, 1, 1, "", "get_params"], [105, 1, 1, "", "get_video"], [105, 1, 1, "", "get_wrapper_attr"], [105, 1, 1, "", "is_generative"], [105, 1, 1, "", "is_online"], [105, 2, 1, "", "np_random"], [105, 1, 1, "", "render"], [105, 1, 1, "", "reseed"], [105, 1, 1, "", "reset"], [105, 2, 1, "", "rng"], [105, 1, 1, "", "sample"], [105, 1, 1, "", "save_video"], [105, 1, 1, "", "step"], [105, 2, 1, "", "unwrapped"]], "rlberry.wrappers.discretize_state": [[106, 0, 1, "", "DiscretizeStateWrapper"]], "rlberry.wrappers.discretize_state.DiscretizeStateWrapper": [[106, 1, 1, "", "close"], [106, 1, 1, "", "get_params"], [106, 1, 1, "", "get_video"], [106, 1, 1, "", "get_wrapper_attr"], [106, 1, 1, "", "is_generative"], [106, 1, 1, "", "is_online"], [106, 2, 1, "", "np_random"], [106, 1, 1, "", "render"], [106, 1, 1, "", "reseed"], [106, 1, 1, "", "reset"], [106, 2, 1, "", "rng"], [106, 1, 1, "", "sample"], [106, 1, 1, "", "save_video"], [106, 1, 1, "", "step"], [106, 2, 1, "", "unwrapped"]], "rlberry.wrappers.gym_utils": [[107, 0, 1, "", "OldGymCompatibilityWrapper"]], "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, 1, 1, "", "close"], [107, 1, 1, "", "get_params"], [107, 1, 1, "", "get_video"], [107, 1, 1, "", "get_wrapper_attr"], [107, 1, 1, "", "is_generative"], [107, 1, 1, "", "is_online"], [107, 2, 1, "", "np_random"], [107, 1, 1, "", "render"], [107, 1, 1, "", "reseed"], [107, 1, 1, "", "reset"], [107, 2, 1, "", "rng"], [107, 1, 1, "", "sample"], [107, 1, 1, "", "save_video"], [107, 1, 1, "", "step"], [107, 2, 1, "", "unwrapped"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:property", "3": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "property", "Python property"], "3": ["py", "function", "Python function"]}, "titleterms": {"about": 0, "u": 0, "contributor": 0, "cite": 0, "rlberri": [0, 1, 5, 14, 23, 35, 36, 43, 44, 48, 49, 53, 54, 56, 57, 58, 60, 61, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110], "fund": 0, "api": 1, "manag": [1, 37, 49, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87], "main": [1, 108], "class": 1, "evalu": [1, 46, 61, 111], "plot": [1, 39, 40, 41, 61], "agent": [1, 5, 36, 44, 45, 46, 49, 50, 52, 53, 55, 59, 61, 66, 67, 68, 69, 108, 109, 110, 111], "base": [1, 16], "import": [1, 43, 49, 56], "tool": [1, 40], "environ": [1, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 43, 49, 50, 54], "space": [1, 91, 92, 93, 94, 95, 96], "seed": [1, 51, 60, 88, 89, 90, 108], "util": [1, 69, 97, 98, 99, 100, 101, 102, 103, 104], "log": [1, 58, 64, 103], "type": 1, "virtual": [1, 35], "writer": [1, 56, 58, 104], "check": 1, "wrapper": [1, 72, 105, 106, 107], "compar": [2, 3, 49, 52, 55, 108], "ppo": [2, 10, 52], "a2c": [2, 6, 43, 52], "acrobot": [2, 11, 24, 43], "adastop": [2, 52, 108], "bandit": [3, 14, 15, 16, 17, 18, 19, 36, 40], "algorithm": [3, 6, 7, 8, 9, 10, 11, 12, 13, 16], "sac": 4, "soft": 4, "actor": 4, "critic": 4, "illustr": [5, 14, 23, 36, 40], "A": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37], "demo": [6, 7, 8, 9, 10, 11, 12, 13, 18, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37], "pball2d": [6, 10, 31], "dqn": [7, 9], "cartpol": [7, 9], "mbqvi": 8, "gridworld": [8, 28], "m": 9, "rskernelucbviag": 11, "rsucbvi": 12, "mountaincar": [12, 29], "valueiter": 13, "chain": [13, 27], "comparison": [15, 16, 44, 50, 52, 108], "thompson": 15, "sampl": [15, 44], "ucb": [15, 19], "bernoulli": 15, "gaussian": 15, "subplot": 16, "variou": 16, "index": 16, "exp3": 17, "cumul": [17, 19, 50], "regret": [17, 19, 50], "bai": 18, "real": 18, "dataset": 18, "select": 18, "mirror": 18, "comput": [20, 42], "time": [20, 42], "atari": [21, 22, 26, 54], "atlanti": 21, "vector": [21, 22], "ppoagent": [21, 22], "breakout": 22, "rsucbviag": 24, "applegold": 25, "freewai": 26, "dqnagent": [26, 33], "valueiterationag": 28, "oldgymcompatibilitywrapp": [30, 107], "old_acrobot": 30, "room": 32, "springcartpol": 33, "twinroom": 34, "us": [35, 47, 53, 54, 55, 57, 60, 66, 67, 68, 71, 73, 75, 76, 78, 79, 82, 84, 85, 87, 93, 105, 107], "multipl": [35, 44], "galleri": 36, "exampl": [36, 62, 66, 67, 68, 71, 73, 75, 76, 78, 79, 82, 84, 85, 87, 93, 105, 107], "experi": [37, 47, 49, 55, 58, 59, 60, 110, 111], "checkpoint": 38, "kernel": 39, "function": 39, "record": 41, "reward": [41, 49, 50, 61], "dure": [41, 49], "train": [41, 61], "quickstart": 43, "deep": [43, 109, 110], "reinforc": [43, 49], "learn": [43, 49], "remind": [43, 44], "rl": [43, 49, 52, 62, 108, 109], "set": [43, 55, 58, 110], "gymnasium": [43, 54, 57, 61], "run": [43, 47], "v1": 43, "quick": [44, 49, 111], "hypothesi": [44, 52], "test": [44, 52, 64], "two": 44, "creat": [45, 53, 54, 55], "an": [45, 46, 49, 53, 54, 59, 62, 108, 110, 111], "optim": [46, 111], "its": 46, "hyperparamet": [46, 111], "setup": [47, 111], "yaml": 47, "config": 47, "file": 47, "parallel": 48, "thread": [48, 60], "process": 48, "spawn": 48, "forkserv": 48, "fork": [48, 62], "start": [49, 111], "requir": 49, "librari": [49, 50, 57, 60, 108, 111], "choos": 49, "defin": 49, "baselin": [49, 57], "expect": [49, 50], "final": 49, "polici": [49, 61], "period": 49, "definit": 50, "iter": 50, "increas": 50, "reproduc": [51, 108, 110], "adapt": 52, "from": [52, 56], "stabl": [52, 57], "baselines3": 52, "result": 52, "visualis": 52, "how": [53, 54, 55, 56, 57, 58, 59, 60, 62, 111], "without": 53, "With": [53, 60], "stablebaselines3": 53, "your": [53, 54, 55, 58, 60], "own": [53, 54], "experimentmanag": [53, 55, 60, 76, 108], "anoth": 55, "output": 55, "video": [55, 61, 62], "some": 55, "advanc": [55, 59, 110], "other": [55, 59], "inform": [55, 59], "export": 56, "data": [56, 61], "tensorboard": 56, "extract": 56, "writerdata": 56, "default": 56, "extern": [57, 60, 111], "": 58, "logger": 58, "level": 58, "save": 59, "load": 59, "previou": [59, 109], "onli": 59, "user": [59, 110, 111], "why": 60, "basic": 60, "In": 60, "classic": 60, "usag": [60, 110], "multi": 60, "visual": [61, 108], "metric": 61, "gener": 61, "gif": 61, "rlberry_scool": 61, "env": [61, 70, 71, 72, 73, 74], "curv": 61, "raw": 61, "error": 61, "represent": 61, "confid": 61, "interv": 61, "predict": 61, "contribut": [62, 64, 108, 110, 111], "instal": [62, 109], "berri": 62, "note": 62, "pre": 62, "commit": 62, "option": [62, 109], "pull": 62, "request": 62, "checklist": 62, "continu": 62, "integr": 62, "ci": 62, "build": [62, 64], "markdown": 62, "link": 62, "between": 62, "document": [62, 64, 108, 112], "page": 62, "have": 62, "acknowledg": 62, "changelog": 63, "dev": 63, "version": [63, 109, 112], "0": [63, 109], "7": [63, 109], "3": [63, 109], "1": 63, "6": 63, "5": 63, "4": 63, "2": 63, "guidelin": 64, "docstr": 64, "agentwithsimplepolici": 67, "stable_baselin": 68, "stablebaselinesag": 68, "replai": 69, "replaybuff": 69, "pipelineenv": 70, "atari_mak": 71, "basewrapp": 72, "gym_mak": 73, "interfac": 74, "model": 74, "adastopcompar": 75, "multiplemanag": 77, "compare_ag": 78, "evaluate_ag": 79, "plot_smoothed_curv": 80, "plot_synchronized_curv": 81, "plot_writer_data": 82, "preset_manag": 83, "read_writer_data": 84, "run_venv_xp": 85, "tensorboard_folder_to_datafram": 86, "with_venv": 87, "safe_rese": 88, "seeder": 89, "set_external_se": 90, "paramet": 90, "box": 91, "dict": 92, "discret": 93, "multibinari": 94, "multidiscret": 95, "tupl": 96, "check_env": 97, "check_experiment_manag": 98, "check_fit_addit": 99, "check_rl_ag": 100, "check_save_load": 101, "check_seeding_ag": 102, "set_level": 103, "defaultwrit": 104, "rescalerewardwrapp": 105, "discretize_st": 106, "discretizestatewrapp": 106, "gym_util": 107, "research": 108, "educ": 108, "content": 108, "featur": 108, "statist": 108, "And": 108, "mani": 108, "more": 108, "latest": 109, "develop": 109, "guid": [110, 111], "introduct": 110, "up": 110, "tutori": 111, "differ": 111, "compat": 111}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"About us": [[0, "about-us"]], "Contributors": [[0, "contributors"]], "Citing rlberry": [[0, "citing-rlberry"]], "Funding": [[0, "funding"]], "rlberry API": [[1, "rlberry-api"]], "Manager": [[1, "manager"]], "Main classes": [[1, "main-classes"]], "Evaluation and plot": [[1, "evaluation-and-plot"]], "Agents": [[1, "agents"]], "Base classes": [[1, "base-classes"]], "Agent importation tools": [[1, "agent-importation-tools"]], "Environments": [[1, "environments"]], "Base class": [[1, "base-class"]], "Spaces": [[1, "spaces"]], "Environment tools": [[1, "environment-tools"]], "Seeding": [[1, "seeding"]], "Utilities, Logging & Typing": [[1, "utilities-logging-typing"]], "Manager Utilitis": [[1, "manager-utilitis"]], "Virtual environment Utilities": [[1, "virtual-environment-utilities"]], "Writer Utilities": [[1, "writer-utilities"]], "Check Utilities": [[1, "check-utilities"]], "Logging Utilities": [[1, "logging-utilities"]], "Environment Wrappers": [[1, "environment-wrappers"]], "Compare PPO and A2C on Acrobot with AdaStop": [[2, "compare-ppo-and-a2c-on-acrobot-with-adastop"]], "Compare Bandit Algorithms": [[3, "compare-bandit-algorithms"]], "SAC Soft Actor-Critic": [[4, "sac-soft-actor-critic"]], "Illustration of rlberry agents": [[5, "illustration-of-rlberry-agents"], [36, "illustration-of-rlberry-agents"]], "A demo of A2C algorithm in PBall2D environment": [[6, "a-demo-of-a2c-algorithm-in-pball2d-environment"]], "A demo of DQN algorithm in CartPole environment": [[7, "a-demo-of-dqn-algorithm-in-cartpole-environment"]], "A demo of MBQVI algorithm in Gridworld environment": [[8, "a-demo-of-mbqvi-algorithm-in-gridworld-environment"]], "A demo of M-DQN algorithm in CartPole environment": [[9, "a-demo-of-m-dqn-algorithm-in-cartpole-environment"]], "A demo of PPO algorithm in PBall2D environment": [[10, "a-demo-of-ppo-algorithm-in-pball2d-environment"]], "A demo of RSKernelUCBVIAgent algorithm in Acrobot environment": [[11, "a-demo-of-rskernelucbviagent-algorithm-in-acrobot-environment"]], "A demo of RSUCBVI algorithm in MountainCar environment": [[12, "a-demo-of-rsucbvi-algorithm-in-mountaincar-environment"]], "A demo of ValueIteration algorithm in Chain environment": [[13, "a-demo-of-valueiteration-algorithm-in-chain-environment"]], "Illustration of bandits in rlberry": [[14, "illustration-of-bandits-in-rlberry"], [36, "illustration-of-bandits-in-rlberry"]], "Comparison of Thompson sampling and UCB on Bernoulli and Gaussian bandits": [[15, "comparison-of-thompson-sampling-and-ucb-on-bernoulli-and-gaussian-bandits"]], "Comparison subplots of various index based bandits algorithms": [[16, "comparison-subplots-of-various-index-based-bandits-algorithms"]], "EXP3 Bandit cumulative regret": [[17, "exp3-bandit-cumulative-regret"]], "A demo of Bandit BAI on a real dataset to select mirrors": [[18, "a-demo-of-bandit-bai-on-a-real-dataset-to-select-mirrors"]], "UCB Bandit cumulative regret": [[19, "ucb-bandit-cumulative-regret"]], "Computation times": [[20, "computation-times"], [42, "computation-times"]], "A demo of ATARI Atlantis environment with vectorized PPOAgent": [[21, "a-demo-of-atari-atlantis-environment-with-vectorized-ppoagent"]], "A demo of ATARI Breakout environment with vectorized PPOAgent": [[22, "a-demo-of-atari-breakout-environment-with-vectorized-ppoagent"]], "Illustration of rlberry environments": [[23, "illustration-of-rlberry-environments"], [36, "illustration-of-rlberry-environments"]], "A demo of Acrobot environment with RSUCBVIAgent": [[24, "a-demo-of-acrobot-environment-with-rsucbviagent"]], "A demo of AppleGold environment": [[25, "a-demo-of-applegold-environment"]], "A demo of ATARI Freeway environment with DQNAgent": [[26, "a-demo-of-atari-freeway-environment-with-dqnagent"]], "A demo of Chain environment": [[27, "a-demo-of-chain-environment"]], "A demo of Gridworld environment with ValueIterationAgent": [[28, "a-demo-of-gridworld-environment-with-valueiterationagent"]], "A demo of MountainCar environment": [[29, "a-demo-of-mountaincar-environment"]], "A demo of OldGymCompatibilityWrapper with old_Acrobot environment": [[30, "a-demo-of-oldgymcompatibilitywrapper-with-old-acrobot-environment"]], "A demo of PBALL2D environment": [[31, "a-demo-of-pball2d-environment"]], "A demo of rooms environment": [[32, "a-demo-of-rooms-environment"]], "A demo of SpringCartPole environment with DQNAgent": [[33, "a-demo-of-springcartpole-environment-with-dqnagent"]], "A demo of twinrooms environment": [[34, "a-demo-of-twinrooms-environment"]], "Using multiple virtual environments with rlberry": [[35, "using-multiple-virtual-environments-with-rlberry"]], "Gallery of examples": [[36, "gallery-of-examples"]], "A demo of Experiment Manager": [[37, "a-demo-of-experiment-manager"]], "Checkpointing": [[38, "checkpointing"]], "Plot kernel functions": [[39, "plot-kernel-functions"]], "Illustration of plotting tools on Bandits": [[40, "illustration-of-plotting-tools-on-bandits"]], "Record reward during training and then plot it": [[41, "record-reward-during-training-and-then-plot-it"]], "Quickstart for Deep Reinforcement Learning in rlberry": [[43, "quickstart-for-deep-reinforcement-learning-in-rlberry"]], "Imports": [[43, "imports"]], "Reminder of the RL setting": [[43, "reminder-of-the-rl-setting"]], "Gymnasium Environment": [[43, "gymnasium-environment"]], "Running A2C on Acrobot-v1": [[43, "running-a2c-on-acrobot-v1"]], "Comparison of Agents": [[44, "comparison-of-agents"]], "Quick reminder on hypothesis testing": [[44, "quick-reminder-on-hypothesis-testing"]], "Two sample testing": [[44, "two-sample-testing"]], "Multiple testing": [[44, "multiple-testing"]], "Multiple agent comparison in rlberry": [[44, "multiple-agent-comparison-in-rlberry"]], "Create an agent": [[45, "create-an-agent"]], "Evaluate an agent and optimize its hyperparameters": [[46, "evaluate-an-agent-and-optimize-its-hyperparameters"]], "Setup and run experiments using yaml config files": [[47, "setup-and-run-experiments-using-yaml-config-files"]], "Parallelization in rlberry": [[48, "parallelization-in-rlberry"]], "Threading": [[48, "threading"]], "Process: spawn or forkserver": [[48, "process-spawn-or-forkserver"]], "Process: fork": [[48, "process-fork"]], "Quick Start for Reinforcement Learning in rlberry": [[49, "quick-start-for-reinforcement-learning-in-rlberry"]], "Importing required libraries": [[49, "importing-required-libraries"]], "Choosing an RL environment": [[49, "choosing-an-rl-environment"]], "Defining an agent and a baseline": [[49, "defining-an-agent-and-a-baseline"]], "Experiment Manager": [[49, "experiment-manager"]], "Comparing the expected rewards of the final policies": [[49, "comparing-the-expected-rewards-of-the-final-policies"]], "Comparing the agents during the learning period": [[49, "comparing-the-agents-during-the-learning-period"]], "Libraries": [[50, "libraries"]], "Environment definition": [[50, "environment-definition"]], "Agents definition": [[50, "agents-definition"]], "Comparisons": [[50, "comparisons"]], "Comparison of expected rewards.": [[50, "comparison-of-expected-rewards"]], "Comparison of cumulative regret as iterations increase": [[50, "comparison-of-cumulative-regret-as-iterations-increase"]], "Seeding & Reproducibility": [[51, "seeding-reproducibility"], [108, "seeding-reproducibility"]], "Adaptive hypothesis testing for comparison of RL agents with AdaStop": [[52, "adaptive-hypothesis-testing-for-comparison-of-rl-agents-with-adastop"]], "Hypothesis testing to compare RL agents": [[52, "hypothesis-testing-to-compare-rl-agents"]], "Comparison of A2C and PPO from stable-baselines3": [[52, "comparison-of-a2c-and-ppo-from-stable-baselines3"]], "Result visualisation": [[52, "result-visualisation"]], "How to use an Agent": [[53, "how-to-use-an-agent"]], "Use rlberry Agent": [[53, "use-rlberry-agent"]], "without agent": [[53, "without-agent"]], "With agent": [[53, "with-agent"]], "Use StableBaselines3 as rlberry Agent": [[53, "use-stablebaselines3-as-rlberry-agent"]], "Create your own Agent": [[53, "create-your-own-agent"]], "Use experimentManager": [[53, "use-experimentmanager"]], "How to use an environment": [[54, "how-to-use-an-environment"]], "Use rlberry environment": [[54, "use-rlberry-environment"]], "Use Gymnasium environment": [[54, "use-gymnasium-environment"]], "Use Atari environment": [[54, "use-atari-environment"]], "Create your own environment": [[54, "create-your-own-environment"]], "How to use the ExperimentManager": [[55, "how-to-use-the-experimentmanager"]], "Create your experiment": [[55, "create-your-experiment"]], "Compare with another agent": [[55, "compare-with-another-agent"]], "Output the video": [[55, "output-the-video"]], "Some advanced settings": [[55, "some-advanced-settings"]], "Other information": [[55, "other-information"], [59, "other-information"]], "How to export/import data (rlberry data, tensorboard data, \u2026)?": [[56, "how-to-export-import-data-rlberry-data-tensorboard-data"]], "How to extract data from the WriterData?": [[56, "how-to-extract-data-from-the-writerdata"]], "Default writer": [[56, "default-writer"]], "How to import data from tensorboard?": [[56, "how-to-import-data-from-tensorboard"]], "How to use the external libraries": [[57, "how-to-use-the-external-libraries"]], "Using rlberry and Gymnasium": [[57, "using-rlberry-and-gymnasium"]], "Using rlberry and Stable Baselines": [[57, "using-rlberry-and-stable-baselines"]], "How to log your experiment": [[58, "how-to-log-your-experiment"]], "Set rlberry\u2019s logger level": [[58, "set-rlberry-s-logger-level"]], "Writer": [[58, "writer"]], "How to save/load an experiment": [[59, "how-to-save-load-an-experiment"]], "how to save an experiment?": [[59, "how-to-save-an-experiment"]], "How to load a previous experiment?": [[59, "how-to-load-a-previous-experiment"]], "How to save/load an agent only? (advanced users)": [[59, "how-to-save-load-an-agent-only-advanced-users"]], "Save the agent": [[59, "save-the-agent"]], "Load the agent": [[59, "load-the-agent"]], "How to seed your experiment": [[60, "how-to-seed-your-experiment"]], "Why use seeding?": [[60, "why-use-seeding"]], "Basics": [[60, "basics"]], "In rlberry": [[60, "in-rlberry"]], "classic usage": [[60, "classic-usage"]], "With ExperimentManager": [[60, "with-experimentmanager"]], "multi-threading": [[60, "multi-threading"]], "External libraries": [[60, "external-libraries"]], "Visualization of policies and plots of training/evaluation metrics in rlberry": [[61, "visualization-of-policies-and-plots-of-training-evaluation-metrics-in-rlberry"]], "Generating videos and gif of the policy of a trained agent": [[61, "generating-videos-and-gif-of-the-policy-of-a-trained-agent"]], "Generating videos": [[61, "generating-videos"]], "Generating gifs with rlberry_scool env": [[61, "generating-gifs-with-rlberry-scool-env"]], "Generating gifs with Gymnasium env": [[61, "generating-gifs-with-gymnasium-env"]], "Plotting training data and reward curves in rlberry": [[61, "plotting-training-data-and-reward-curves-in-rlberry"]], "Plotting raw curves": [[61, "plotting-raw-curves"]], "Error representation \u2013 confidence intervals and prediction intervals": [[61, "error-representation-confidence-intervals-and-prediction-intervals"]], "Visualization of evaluations of trained agents in rlberry": [[61, "visualization-of-evaluations-of-trained-agents-in-rlberry"]], "How to contribute": [[62, "how-to-contribute"]], "Forking and installing rl-berry": [[62, "forking-and-installing-rl-berry"]], "Note": [[62, null]], "Pre-commit (optional)": [[62, "pre-commit-optional"]], "Pull request checklist": [[62, "pull-request-checklist"]], "Continuous integration (CI)": [[62, "continuous-integration-ci"]], "Building examples": [[62, "building-examples"]], "Markdown and link between documentation pages.": [[62, "markdown-and-link-between-documentation-pages"]], "Have a video for an example in the documentation": [[62, "have-a-video-for-an-example-in-the-documentation"]], "Acknowledgements": [[62, "acknowledgements"]], "Changelog": [[63, "changelog"]], "Dev version": [[63, "dev-version"]], "Version 0.7.3": [[63, "version-0-7-3"]], "Version 0.7.1": [[63, "version-0-7-1"]], "Version 0.7.0": [[63, "version-0-7-0"]], "Version 0.6.0": [[63, "version-0-6-0"]], "Version 0.5.0": [[63, "version-0-5-0"]], "Version 0.4.1": [[63, "version-0-4-1"]], "Version 0.4.0": [[63, "version-0-4-0"]], "Version 0.3.0": [[63, "version-0-3-0"]], "Version 0.2.1": [[63, "version-0-2-1"]], "Version 0.2": [[63, "version-0-2"]], "Contributing": [[64, "contributing"]], "Documentation": [[64, "documentation"]], "Building the documentation": [[64, "building-the-documentation"]], "Tests": [[64, "tests"]], "Guidelines for docstring": [[64, "guidelines-for-docstring"]], "Guidelines for logging": [[64, "guidelines-for-logging"]], "rlberry.agents.Agent": [[66, "rlberry-agents-agent"]], "Examples using rlberry.agents.Agent": [[66, "examples-using-rlberry-agents-agent"]], "rlberry.agents.AgentWithSimplePolicy": [[67, "rlberry-agents-agentwithsimplepolicy"]], "Examples using rlberry.agents.AgentWithSimplePolicy": [[67, "examples-using-rlberry-agents-agentwithsimplepolicy"]], "rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, "rlberry-agents-stable-baselines-stablebaselinesagent"]], "Examples using rlberry.agents.stable_baselines.StableBaselinesAgent": [[68, "examples-using-rlberry-agents-stable-baselines-stablebaselinesagent"]], "rlberry.agents.utils.replay.ReplayBuffer": [[69, "rlberry-agents-utils-replay-replaybuffer"]], "rlberry.envs.PipelineEnv": [[70, "rlberry-envs-pipelineenv"]], "rlberry.envs.atari_make": [[71, "rlberry-envs-atari-make"]], "Examples using rlberry.envs.atari_make": [[71, "examples-using-rlberry-envs-atari-make"]], "rlberry.envs.basewrapper.Wrapper": [[72, "rlberry-envs-basewrapper-wrapper"]], "rlberry.envs.gym_make": [[73, "rlberry-envs-gym-make"]], "Examples using rlberry.envs.gym_make": [[73, "examples-using-rlberry-envs-gym-make"]], "rlberry.envs.interface.Model": [[74, "rlberry-envs-interface-model"]], "rlberry.manager.AdastopComparator": [[75, "rlberry-manager-adastopcomparator"]], "Examples using rlberry.manager.AdastopComparator": [[75, "examples-using-rlberry-manager-adastopcomparator"]], "rlberry.manager.ExperimentManager": [[76, "rlberry-manager-experimentmanager"]], "Examples using rlberry.manager.ExperimentManager": [[76, "examples-using-rlberry-manager-experimentmanager"]], "rlberry.manager.MultipleManagers": [[77, "rlberry-manager-multiplemanagers"]], "rlberry.manager.compare_agents": [[78, "rlberry-manager-compare-agents"]], "Examples using rlberry.manager.compare_agents": [[78, "examples-using-rlberry-manager-compare-agents"]], "rlberry.manager.evaluate_agents": [[79, "rlberry-manager-evaluate-agents"]], "Examples using rlberry.manager.evaluate_agents": [[79, "examples-using-rlberry-manager-evaluate-agents"]], "rlberry.manager.plot_smoothed_curves": [[80, "rlberry-manager-plot-smoothed-curves"]], "rlberry.manager.plot_synchronized_curves": [[81, "rlberry-manager-plot-synchronized-curves"]], "rlberry.manager.plot_writer_data": [[82, "rlberry-manager-plot-writer-data"]], "Examples using rlberry.manager.plot_writer_data": [[82, "examples-using-rlberry-manager-plot-writer-data"]], "rlberry.manager.preset_manager": [[83, "rlberry-manager-preset-manager"]], "rlberry.manager.read_writer_data": [[84, "rlberry-manager-read-writer-data"]], "Examples using rlberry.manager.read_writer_data": [[84, "examples-using-rlberry-manager-read-writer-data"]], "rlberry.manager.run_venv_xp": [[85, "rlberry-manager-run-venv-xp"]], "Examples using rlberry.manager.run_venv_xp": [[85, "examples-using-rlberry-manager-run-venv-xp"]], "rlberry.manager.tensorboard_folder_to_dataframe": [[86, "rlberry-manager-tensorboard-folder-to-dataframe"]], "rlberry.manager.with_venv": [[87, "rlberry-manager-with-venv"]], "Examples using rlberry.manager.with_venv": [[87, "examples-using-rlberry-manager-with-venv"]], "rlberry.seeding.safe_reseed": [[88, "rlberry-seeding-safe-reseed"]], "rlberry.seeding.seeder.Seeder": [[89, "rlberry-seeding-seeder-seeder"]], "rlberry.seeding.set_external_seed": [[90, "rlberry-seeding-set-external-seed"]], "Parameters": [[90, "parameters"]], "rlberry.spaces.Box": [[91, "rlberry-spaces-box"]], "rlberry.spaces.Dict": [[92, "rlberry-spaces-dict"]], "rlberry.spaces.Discrete": [[93, "rlberry-spaces-discrete"]], "Examples using rlberry.spaces.Discrete": [[93, "examples-using-rlberry-spaces-discrete"]], "rlberry.spaces.MultiBinary": [[94, "rlberry-spaces-multibinary"]], "rlberry.spaces.MultiDiscrete": [[95, "rlberry-spaces-multidiscrete"]], "rlberry.spaces.Tuple": [[96, "rlberry-spaces-tuple"]], "rlberry.utils.check_env": [[97, "rlberry-utils-check-env"]], "rlberry.utils.check_experiment_manager": [[98, "rlberry-utils-check-experiment-manager"]], "rlberry.utils.check_fit_additive": [[99, "rlberry-utils-check-fit-additive"]], "rlberry.utils.check_rl_agent": [[100, "rlberry-utils-check-rl-agent"]], "rlberry.utils.check_save_load": [[101, "rlberry-utils-check-save-load"]], "rlberry.utils.check_seeding_agent": [[102, "rlberry-utils-check-seeding-agent"]], "rlberry.utils.logging.set_level": [[103, "rlberry-utils-logging-set-level"]], "rlberry.utils.writers.DefaultWriter": [[104, "rlberry-utils-writers-defaultwriter"]], "rlberry.wrappers.RescaleRewardWrapper": [[105, "rlberry-wrappers-rescalerewardwrapper"]], "Examples using rlberry.wrappers.RescaleRewardWrapper": [[105, "examples-using-rlberry-wrappers-rescalerewardwrapper"]], "rlberry.wrappers.discretize_state.DiscretizeStateWrapper": [[106, "rlberry-wrappers-discretize-state-discretizestatewrapper"]], "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, "rlberry-wrappers-gym-utils-oldgymcompatibilitywrapper"]], "Examples using rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper": [[107, "examples-using-rlberry-wrappers-gym-utils-oldgymcompatibilitywrapper"]], "An RL Library for Research and Education": [[108, "an-rl-library-for-research-and-education"]], "Documentation Contents": [[108, "documentation-contents"]], "Contributing to rlberry": [[108, "contributing-to-rlberry"], [110, "contributing-to-rlberry"]], "rlberry main features": [[108, "rlberry-main-features"]], "ExperimentManager": [[108, "experimentmanager"]], "Statistical comparison of RL agents": [[108, "statistical-comparison-of-rl-agents"]], "Compare agents": [[108, "compare-agents"]], "AdaStop": [[108, "adastop"]], "Visualization": [[108, "visualization"]], "And many more !": [[108, "and-many-more"]], "Installation": [[109, "installation"]], "Latest version (0.7.3)": [[109, "latest-version-0-7-3"]], "Options": [[109, "options"]], "Development version": [[109, "development-version"]], "Previous versions": [[109, "previous-versions"]], "Deep RL agents": [[109, "deep-rl-agents"]], "User Guide": [[110, "user-guide"]], "Introduction": [[110, "introduction"]], "Set up an experiment": [[110, "set-up-an-experiment"]], "Experimenting with Deep agents": [[110, "experimenting-with-deep-agents"]], "Reproducibility": [[110, "reproducibility"]], "Advanced Usage": [[110, "advanced-usage"]], "User guide": [[111, "user-guide"]], "Tutorials": [[111, "tutorials"]], "Quick start: setup an experiment and evaluate different agents": [[111, "quick-start-setup-an-experiment-and-evaluate-different-agents"]], "Agents, hyperparameter optimization and experiment setup": [[111, "agents-hyperparameter-optimization-and-experiment-setup"]], "Compatibility with External Libraries": [[111, "compatibility-with-external-libraries"]], "How to contribute?": [[111, "how-to-contribute"]], "Documentation versions": [[112, "documentation-versions"]]}, "indexentries": {"agent (class in rlberry.agents)": [[66, "rlberry.agents.Agent"]], "eval() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.eval"]], "fit() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.fit"]], "get_params() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.get_params"]], "load() (rlberry.agents.agent class method)": [[66, "rlberry.agents.Agent.load"]], "output_dir (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.output_dir"]], "reseed() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.reseed"]], "rng (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.rng"]], "sample_parameters() (rlberry.agents.agent class method)": [[66, "rlberry.agents.Agent.sample_parameters"]], "save() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.save"]], "set_writer() (rlberry.agents.agent method)": [[66, "rlberry.agents.Agent.set_writer"]], "thread_shared_data (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.thread_shared_data"]], "unique_id (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.unique_id"]], "writer (rlberry.agents.agent property)": [[66, "rlberry.agents.Agent.writer"]], "agentwithsimplepolicy (class in rlberry.agents)": [[67, "rlberry.agents.AgentWithSimplePolicy"]], "eval() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.eval"]], "fit() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.fit"]], "get_params() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.get_params"]], "load() (rlberry.agents.agentwithsimplepolicy class method)": [[67, "rlberry.agents.AgentWithSimplePolicy.load"]], "output_dir (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.output_dir"]], "policy() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.policy"]], "reseed() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.reseed"]], "rng (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.rng"]], "sample_parameters() (rlberry.agents.agentwithsimplepolicy class method)": [[67, "rlberry.agents.AgentWithSimplePolicy.sample_parameters"]], "save() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.save"]], "set_writer() (rlberry.agents.agentwithsimplepolicy method)": [[67, "rlberry.agents.AgentWithSimplePolicy.set_writer"]], "thread_shared_data (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.thread_shared_data"]], "unique_id (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.unique_id"]], "writer (rlberry.agents.agentwithsimplepolicy property)": [[67, "rlberry.agents.AgentWithSimplePolicy.writer"]], "stablebaselinesagent (class in rlberry.agents.stable_baselines)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent"]], "eval() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.eval"]], "fit() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.fit"]], "get_params() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.get_params"]], "load() (rlberry.agents.stable_baselines.stablebaselinesagent class method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.load"]], "output_dir (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.output_dir"]], "policy() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.policy"]], "reseed() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.reseed"]], "rng (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.rng"]], "sample_parameters() (rlberry.agents.stable_baselines.stablebaselinesagent class method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.sample_parameters"]], "save() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.save"]], "set_logger() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.set_logger"]], "set_writer() (rlberry.agents.stable_baselines.stablebaselinesagent method)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.set_writer"]], "thread_shared_data (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.thread_shared_data"]], "unique_id (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.unique_id"]], "writer (rlberry.agents.stable_baselines.stablebaselinesagent property)": [[68, "rlberry.agents.stable_baselines.StableBaselinesAgent.writer"]], "replaybuffer (class in rlberry.agents.utils.replay)": [[69, "rlberry.agents.utils.replay.ReplayBuffer"]], "append() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.append"]], "clear() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.clear"]], "data (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.data"]], "dtypes (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.dtypes"]], "end_episode() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.end_episode"]], "max_episode_steps (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.max_episode_steps"]], "sample() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.sample"]], "setup_entry() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.setup_entry"]], "tags (rlberry.agents.utils.replay.replaybuffer property)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.tags"]], "update_priorities() (rlberry.agents.utils.replay.replaybuffer method)": [[69, "rlberry.agents.utils.replay.ReplayBuffer.update_priorities"]], "pipelineenv() (in module rlberry.envs)": [[70, "rlberry.envs.PipelineEnv"]], "atari_make() (in module rlberry.envs)": [[71, "rlberry.envs.atari_make"]], "wrapper (class in rlberry.envs.basewrapper)": [[72, "rlberry.envs.basewrapper.Wrapper"]], "close() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.close"]], "get_params() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_params"]], "get_video() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_video"]], "get_wrapper_attr() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.get_wrapper_attr"]], "is_generative() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.is_generative"]], "is_online() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.is_online"]], "np_random (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.np_random"]], "render() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.render"]], "reseed() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.reseed"]], "reset() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.reset"]], "rng (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.rng"]], "sample() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.sample"]], "save_video() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.save_video"]], "step() (rlberry.envs.basewrapper.wrapper method)": [[72, "rlberry.envs.basewrapper.Wrapper.step"]], "unwrapped (rlberry.envs.basewrapper.wrapper property)": [[72, "rlberry.envs.basewrapper.Wrapper.unwrapped"]], "gym_make() (in module rlberry.envs)": [[73, "rlberry.envs.gym_make"]], "model (class in rlberry.envs.interface)": [[74, "rlberry.envs.interface.Model"]], "close() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.close"]], "get_params() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.get_params"]], "get_wrapper_attr() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.get_wrapper_attr"]], "is_generative() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.is_generative"]], "is_online() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.is_online"]], "np_random (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.np_random"]], "render() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.render"]], "reseed() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.reseed"]], "reset() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.reset"]], "rng (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.rng"]], "sample() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.sample"]], "step() (rlberry.envs.interface.model method)": [[74, "rlberry.envs.interface.Model.step"]], "unwrapped (rlberry.envs.interface.model property)": [[74, "rlberry.envs.interface.Model.unwrapped"]], "adastopcomparator (class in rlberry.manager)": [[75, "rlberry.manager.AdastopComparator"]], "compare() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.compare"]], "compute_mean_diffs() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.compute_mean_diffs"]], "get_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.get_results"]], "partial_compare() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.partial_compare"]], "plot_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.plot_results"]], "plot_results_sota() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.plot_results_sota"]], "print_results() (rlberry.manager.adastopcomparator method)": [[75, "rlberry.manager.AdastopComparator.print_results"]], "experimentmanager (class in rlberry.manager)": [[76, "rlberry.manager.ExperimentManager"]], "build_eval_env() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.build_eval_env"]], "clear_handlers() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.clear_handlers"]], "clear_output_dir() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.clear_output_dir"]], "eval_agents() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.eval_agents"]], "fit() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.fit"]], "generate_profile() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.generate_profile"]], "get_agent_instances() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.get_agent_instances"]], "get_writer_data() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.get_writer_data"]], "load() (rlberry.manager.experimentmanager class method)": [[76, "rlberry.manager.ExperimentManager.load"]], "optimize_hyperparams() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.optimize_hyperparams"]], "save() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.save"]], "set_writer() (rlberry.manager.experimentmanager method)": [[76, "rlberry.manager.ExperimentManager.set_writer"]], "multiplemanagers (class in rlberry.manager)": [[77, "rlberry.manager.MultipleManagers"]], "append() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.append"]], "run() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.run"]], "save() (rlberry.manager.multiplemanagers method)": [[77, "rlberry.manager.MultipleManagers.save"]], "compare_agents() (in module rlberry.manager)": [[78, "rlberry.manager.compare_agents"]], "evaluate_agents() (in module rlberry.manager)": [[79, "rlberry.manager.evaluate_agents"]], "plot_smoothed_curves() (in module rlberry.manager)": [[80, "rlberry.manager.plot_smoothed_curves"]], "plot_synchronized_curves() (in module rlberry.manager)": [[81, "rlberry.manager.plot_synchronized_curves"]], "plot_writer_data() (in module rlberry.manager)": [[82, "rlberry.manager.plot_writer_data"]], "preset_manager() (in module rlberry.manager)": [[83, "rlberry.manager.preset_manager"]], "read_writer_data() (in module rlberry.manager)": [[84, "rlberry.manager.read_writer_data"]], "run_venv_xp() (in module rlberry.manager)": [[85, "rlberry.manager.run_venv_xp"]], "tensorboard_folder_to_dataframe() (in module rlberry.manager)": [[86, "rlberry.manager.tensorboard_folder_to_dataframe"]], "with_venv() (in module rlberry.manager)": [[87, "rlberry.manager.with_venv"]], "safe_reseed() (in module rlberry.seeding)": [[88, "rlberry.seeding.safe_reseed"]], "seeder (class in rlberry.seeding.seeder)": [[89, "rlberry.seeding.seeder.Seeder"]], "reseed() (rlberry.seeding.seeder.seeder method)": [[89, "rlberry.seeding.seeder.Seeder.reseed"]], "spawn() (rlberry.seeding.seeder.seeder method)": [[89, "rlberry.seeding.seeder.Seeder.spawn"]], "set_external_seed() (in module rlberry.seeding)": [[90, "rlberry.seeding.set_external_seed"]], "box (class in rlberry.spaces)": [[91, "rlberry.spaces.Box"]], "contains() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.contains"]], "from_jsonable() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.from_jsonable"]], "is_bounded() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.is_bounded"]], "is_np_flattenable (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.is_np_flattenable"]], "np_random (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.np_random"]], "reseed() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.reseed"]], "sample() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.sample"]], "seed() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.seed"]], "shape (rlberry.spaces.box property)": [[91, "rlberry.spaces.Box.shape"]], "to_jsonable() (rlberry.spaces.box method)": [[91, "rlberry.spaces.Box.to_jsonable"]], "dict (class in rlberry.spaces)": [[92, "rlberry.spaces.Dict"]], "contains() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.contains"]], "from_jsonable() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.from_jsonable"]], "get() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.get"]], "is_np_flattenable (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.is_np_flattenable"]], "items() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.items"]], "keys() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.keys"]], "np_random (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.np_random"]], "sample() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.sample"]], "seed() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.seed"]], "shape (rlberry.spaces.dict property)": [[92, "rlberry.spaces.Dict.shape"]], "to_jsonable() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.to_jsonable"]], "values() (rlberry.spaces.dict method)": [[92, "rlberry.spaces.Dict.values"]], "discrete (class in rlberry.spaces)": [[93, "rlberry.spaces.Discrete"]], "contains() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.contains"]], "from_jsonable() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.from_jsonable"]], "is_np_flattenable (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.is_np_flattenable"]], "np_random (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.np_random"]], "reseed() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.reseed"]], "sample() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.sample"]], "seed() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.seed"]], "shape (rlberry.spaces.discrete property)": [[93, "rlberry.spaces.Discrete.shape"]], "to_jsonable() (rlberry.spaces.discrete method)": [[93, "rlberry.spaces.Discrete.to_jsonable"]], "multibinary (class in rlberry.spaces)": [[94, "rlberry.spaces.MultiBinary"]], "contains() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.contains"]], "from_jsonable() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.from_jsonable"]], "is_np_flattenable (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.is_np_flattenable"]], "np_random (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.np_random"]], "reseed() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.reseed"]], "sample() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.sample"]], "seed() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.seed"]], "shape (rlberry.spaces.multibinary property)": [[94, "rlberry.spaces.MultiBinary.shape"]], "to_jsonable() (rlberry.spaces.multibinary method)": [[94, "rlberry.spaces.MultiBinary.to_jsonable"]], "multidiscrete (class in rlberry.spaces)": [[95, "rlberry.spaces.MultiDiscrete"]], "contains() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.contains"]], "from_jsonable() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.from_jsonable"]], "is_np_flattenable (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.is_np_flattenable"]], "np_random (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.np_random"]], "reseed() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.reseed"]], "sample() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.sample"]], "seed() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.seed"]], "shape (rlberry.spaces.multidiscrete property)": [[95, "rlberry.spaces.MultiDiscrete.shape"]], "to_jsonable() (rlberry.spaces.multidiscrete method)": [[95, "rlberry.spaces.MultiDiscrete.to_jsonable"]], "tuple (class in rlberry.spaces)": [[96, "rlberry.spaces.Tuple"]], "contains() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.contains"]], "count() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.count"]], "from_jsonable() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.from_jsonable"]], "index() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.index"]], "is_np_flattenable (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.is_np_flattenable"]], "np_random (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.np_random"]], "sample() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.sample"]], "seed() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.seed"]], "shape (rlberry.spaces.tuple property)": [[96, "rlberry.spaces.Tuple.shape"]], "to_jsonable() (rlberry.spaces.tuple method)": [[96, "rlberry.spaces.Tuple.to_jsonable"]], "check_env() (in module rlberry.utils)": [[97, "rlberry.utils.check_env"]], "check_experiment_manager() (in module rlberry.utils)": [[98, "rlberry.utils.check_experiment_manager"]], "check_fit_additive() (in module rlberry.utils)": [[99, "rlberry.utils.check_fit_additive"]], "check_rl_agent() (in module rlberry.utils)": [[100, "rlberry.utils.check_rl_agent"]], "check_save_load() (in module rlberry.utils)": [[101, "rlberry.utils.check_save_load"]], "check_seeding_agent() (in module rlberry.utils)": [[102, "rlberry.utils.check_seeding_agent"]], "set_level() (in module rlberry.utils.logging)": [[103, "rlberry.utils.logging.set_level"]], "defaultwriter (class in rlberry.utils.writers)": [[104, "rlberry.utils.writers.DefaultWriter"]], "add_scalar() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.add_scalar"]], "add_scalars() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.add_scalars"]], "read_first_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_first_tag_value"]], "read_last_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_last_tag_value"]], "read_tag_value() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.read_tag_value"]], "reset() (rlberry.utils.writers.defaultwriter method)": [[104, "rlberry.utils.writers.DefaultWriter.reset"]], "rescalerewardwrapper (class in rlberry.wrappers)": [[105, "rlberry.wrappers.RescaleRewardWrapper"]], "close() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.close"]], "get_params() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_params"]], "get_video() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.is_generative"]], "is_online() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.is_online"]], "np_random (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.np_random"]], "render() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.render"]], "reseed() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.reseed"]], "reset() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.reset"]], "rng (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.rng"]], "sample() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.sample"]], "save_video() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.save_video"]], "step() (rlberry.wrappers.rescalerewardwrapper method)": [[105, "rlberry.wrappers.RescaleRewardWrapper.step"]], "unwrapped (rlberry.wrappers.rescalerewardwrapper property)": [[105, "rlberry.wrappers.RescaleRewardWrapper.unwrapped"]], "discretizestatewrapper (class in rlberry.wrappers.discretize_state)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper"]], "close() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.close"]], "get_params() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_params"]], "get_video() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.is_generative"]], "is_online() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.is_online"]], "np_random (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.np_random"]], "render() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.render"]], "reseed() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.reseed"]], "reset() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.reset"]], "rng (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.rng"]], "sample() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.sample"]], "save_video() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.save_video"]], "step() (rlberry.wrappers.discretize_state.discretizestatewrapper method)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.step"]], "unwrapped (rlberry.wrappers.discretize_state.discretizestatewrapper property)": [[106, "rlberry.wrappers.discretize_state.DiscretizeStateWrapper.unwrapped"]], "oldgymcompatibilitywrapper (class in rlberry.wrappers.gym_utils)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper"]], "close() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.close"]], "get_params() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_params"]], "get_video() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_video"]], "get_wrapper_attr() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.get_wrapper_attr"]], "is_generative() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.is_generative"]], "is_online() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.is_online"]], "np_random (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.np_random"]], "render() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.render"]], "reseed() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.reseed"]], "reset() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.reset"]], "rng (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.rng"]], "sample() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.sample"]], "save_video() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.save_video"]], "step() (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper method)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.step"]], "unwrapped (rlberry.wrappers.gym_utils.oldgymcompatibilitywrapper property)": [[107, "rlberry.wrappers.gym_utils.OldGymCompatibilityWrapper.unwrapped"]]}})
\ No newline at end of file