Skip to content

Commit

Permalink
Reverse order of lower and upper bounds arguments in bounded UCB/MOSS…
Browse files Browse the repository at this point in the history
… indices (rlberry-py#348)

* reverse LB/UB in bounded UCB/MOSS indices

* Empty commit
  • Loading branch information
RemyDegenne authored Jul 24, 2023
1 parent 0a5cedd commit 5833bf0
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion examples/demo_bandits/plot_TS_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BernoulliUCBAgent(IndexAgent):
name = "Bernoulli UCB Agent"

def __init__(self, env, **kwargs):
index, _ = makeBoundedUCBIndex(1, 0)
index, _ = makeBoundedUCBIndex(0, 1)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")

Expand Down
16 changes: 8 additions & 8 deletions rlberry/agents/bandits/indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def index(tr):


def makeBoundedUCBIndex(
upper_bound: float = 1.0,
lower_bound: float = 0.0,
upper_bound: float = 1.0,
delta: Callable = lambda t: 1 / (1 + (t + 1) * np.log(t + 1) ** 2),
):
"""
Expand All @@ -92,12 +92,12 @@ def makeBoundedUCBIndex(
Parameters
----------
upper_bound: float, default: 1.0
Upper bound on the rewards.
lower_bound: float, default: 0.0
Lower bound on the rewards.
upper_bound: float, default: 1.0
Upper bound on the rewards.
delta: Callable,
Confidence level. Default is tuned to have asymptotically optimal
regret, see Chapter 8 in [1].
Expand Down Expand Up @@ -167,7 +167,7 @@ def index(tr):


def makeBoundedMOSSIndex(
T: float = 1, A: float = 2, upper_bound: float = 1.0, lower_bound: float = 0.0
T: float = 1, A: float = 2, lower_bound: float = 0.0, upper_bound: float = 1.0
):
"""
MOSS index for bounded distributions, see Chapters 9 in [1].
Expand All @@ -182,12 +182,12 @@ def makeBoundedMOSSIndex(
A: int
Number of arms.
upper_bound: float, default: 1.0
Upper bound on the rewards.
lower_bound: float, default: 0.0
Lower bound on the rewards.
upper_bound: float, default: 1.0
Upper bound on the rewards.
Return
------
Callable
Expand Down

0 comments on commit 5833bf0

Please sign in to comment.