Reverse order of lower and upper bounds arguments in bounded UCB/MOSS…

… indices (rlberry-py#348) * reverse LB/UB in bounded UCB/MOSS indices * Empty commit
BorisHamadej · Jul 24, 2023 · 5833bf0 · 5833bf0
1 parent 0a5cedd
commit 5833bf0
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 9 deletions.
diff --git a/examples/demo_bandits/plot_TS_bandit.py b/examples/demo_bandits/plot_TS_bandit.py
@@ -42,7 +42,7 @@ class BernoulliUCBAgent(IndexAgent):
     name = "Bernoulli UCB Agent"
 
     def __init__(self, env, **kwargs):
-        index, _ = makeBoundedUCBIndex(1, 0)
+        index, _ = makeBoundedUCBIndex(0, 1)
         IndexAgent.__init__(self, env, index, **kwargs)
         self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
 

diff --git a/rlberry/agents/bandits/indices.py b/rlberry/agents/bandits/indices.py
@@ -81,8 +81,8 @@ def index(tr):
 
 
 def makeBoundedUCBIndex(
-    upper_bound: float = 1.0,
     lower_bound: float = 0.0,
+    upper_bound: float = 1.0,
     delta: Callable = lambda t: 1 / (1 + (t + 1) * np.log(t + 1) ** 2),
 ):
     """
@@ -92,12 +92,12 @@ def makeBoundedUCBIndex(
 
     Parameters
     ----------
-    upper_bound: float, default: 1.0
-        Upper bound on the rewards.
-
     lower_bound: float, default: 0.0
         Lower bound on the rewards.
 
+    upper_bound: float, default: 1.0
+        Upper bound on the rewards.
+
     delta: Callable,
         Confidence level. Default is tuned to have asymptotically optimal
         regret, see Chapter 8 in [1].
@@ -167,7 +167,7 @@ def index(tr):
 
 
 def makeBoundedMOSSIndex(
-    T: float = 1, A: float = 2, upper_bound: float = 1.0, lower_bound: float = 0.0
+    T: float = 1, A: float = 2, lower_bound: float = 0.0, upper_bound: float = 1.0
 ):
     """
     MOSS index for bounded distributions, see Chapters 9 in [1].
@@ -182,12 +182,12 @@ def makeBoundedMOSSIndex(
     A: int
         Number of arms.
 
-    upper_bound: float, default: 1.0
-        Upper bound on the rewards.
-
     lower_bound: float, default: 0.0
         Lower bound on the rewards.
 
+    upper_bound: float, default: 1.0
+        Upper bound on the rewards.
+
     Return
     ------
     Callable