From 56436b636a425d033122981e4399bf8778c3b7d3 Mon Sep 17 00:00:00 2001 From: Shahar Bar Date: Wed, 11 Dec 2024 14:06:54 +0200 Subject: [PATCH] Add epsilon greedy support in from_state ### Changes * Add epsilon and default_action to from_state methods in smab.py and cmab.py. * Updated state UTs. --- .github/workflows/release_draft.yml | 1 + pybandits/cmab.py | 20 +++++++++-- pybandits/smab.py | 32 +++++++++++++++--- tests/test_cmab.py | 38 ++++++++++++++------- tests/test_smab.py | 51 +++++++++++++++++++---------- 5 files changed, 105 insertions(+), 37 deletions(-) diff --git a/.github/workflows/release_draft.yml b/.github/workflows/release_draft.yml index cadfae7..a57c086 100644 --- a/.github/workflows/release_draft.yml +++ b/.github/workflows/release_draft.yml @@ -4,6 +4,7 @@ on: push: branches: - develop + - main jobs: draft_release: diff --git a/pybandits/cmab.py b/pybandits/cmab.py index efc5587..acfbb2a 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -227,7 +227,11 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "CmabBernoulli": - return cls(actions=state["actions"]) + return cls( + actions=state["actions"], + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call(config=dict(arbitrary_types_allowed=True)) def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): @@ -271,7 +275,12 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "CmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) + return cls( + actions=state["actions"], + exploit_p=state["strategy"].get("exploit_p", None), + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call(config=dict(arbitrary_types_allowed=True)) def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): @@ -324,7 +333,12 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "CmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) + return cls( + actions=state["actions"], + subsidy_factor=state["strategy"].get("subsidy_factor", None), + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call(config=dict(arbitrary_types_allowed=True)) def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): diff --git a/pybandits/smab.py b/pybandits/smab.py index 65e4bb1..a3ee363 100644 --- a/pybandits/smab.py +++ b/pybandits/smab.py @@ -150,7 +150,11 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "SmabBernoulli": - return cls(actions=state["actions"]) + return cls( + actions=state["actions"], + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call def update(self, actions: List[ActionId], rewards: List[BinaryReward]): @@ -187,7 +191,12 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "SmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) + return cls( + actions=state["actions"], + exploit_p=state["strategy"].get("exploit_p", None), + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call def update(self, actions: List[ActionId], rewards: List[BinaryReward]): @@ -232,7 +241,12 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "SmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) + return cls( + actions=state["actions"], + subsidy_factor=state["strategy"].get("subsidy_factor", None), + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call def update(self, actions: List[ActionId], rewards: List[BinaryReward]): @@ -303,7 +317,11 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "SmabBernoulliMO": - return cls(actions=state["actions"]) + return cls( + actions=state["actions"], + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) class SmabBernoulliMOCC(BaseSmabBernoulliMO): @@ -337,7 +355,11 @@ def __init__( @classmethod def from_state(cls, state: dict) -> "SmabBernoulliMOCC": - return cls(actions=state["actions"]) + return cls( + actions=state["actions"], + epsilon=state.get("epsilon", None), + default_action=state.get("default_action", None), + ) @validate_call diff --git a/tests/test_cmab.py b/tests/test_cmab.py index a992afa..2ff034f 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -19,7 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import get_args +from typing import Optional, get_args import numpy as np import pandas as pd @@ -381,21 +381,26 @@ def run_predict(mab): @settings(deadline=500) -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=2, max_value=100)) -def test_cmab_get_state(mu, sigma, n_features): +@given( + st.integers(min_value=1), + st.integers(min_value=1), + st.integers(min_value=2, max_value=100), + st.sampled_from([None, 0.1]), +) +def test_cmab_get_state(mu, sigma, n_features, epsilon): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), } - cmab = CmabBernoulli(actions=actions) + cmab = CmabBernoulli(actions=actions, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {}, "predict_with_proba": False, "predict_actions_randomly": False, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -438,6 +443,7 @@ def test_cmab_get_state(mu, sigma, n_features): min_size=2, ), "strategy": st.fixed_dictionaries({}), + "epsilon": st.sampled_from([None, 0.1]), } ), update_method=st.sampled_from(literal_update_methods), @@ -613,21 +619,22 @@ def test_cmab_bai_update(n_samples, n_features, update_method): st.integers(min_value=1), st.integers(min_value=2, max_value=100), st.floats(min_value=0, max_value=1), + st.sampled_from([None, 0.1]), ) -def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): +def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01, epsilon: Optional[Float01]): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), } - cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p) + cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {"exploit_p": exploit_p}, "predict_with_proba": False, "predict_actions_randomly": False, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -674,6 +681,7 @@ def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): st.just({"exploit_p": None}), st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), ), + "epsilon": st.sampled_from([None, 0.1]), } ), update_method=st.sampled_from(literal_update_methods), @@ -864,9 +872,16 @@ def test_cmab_cc_update(n_samples, n_features, update_method): st.floats(min_value=0), st.floats(min_value=0), st.floats(min_value=0, max_value=1), + st.sampled_from([None, 0.1]), ) def test_cmab_cc_get_state( - mu, sigma, n_features, cost_1: NonNegativeFloat, cost_2: NonNegativeFloat, subsidy_factor: Float01 + mu, + sigma, + n_features, + cost_1: NonNegativeFloat, + cost_2: NonNegativeFloat, + subsidy_factor: Float01, + epsilon: Optional[Float01], ): actions: dict = { "a1": BayesianLogisticRegressionCC( @@ -875,14 +890,14 @@ def test_cmab_cc_get_state( "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost_2), } - cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) + cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {"subsidy_factor": subsidy_factor}, "predict_with_proba": True, "predict_actions_randomly": False, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -930,6 +945,7 @@ def test_cmab_cc_get_state( st.just({"subsidy_factor": None}), st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), ), + "epsilon": st.sampled_from([None, 0.1]), } ), update_method=st.sampled_from(literal_update_methods), diff --git a/tests/test_smab.py b/tests/test_smab.py index 369d016..06136d3 100644 --- a/tests/test_smab.py +++ b/tests/test_smab.py @@ -21,7 +21,7 @@ # SOFTWARE. import json from copy import deepcopy -from typing import List +from typing import List, Optional import pytest from hypothesis import given @@ -203,16 +203,22 @@ def test_smab_accepts_only_valid_actions(s): SmabBernoulli(actions={s: Beta(), s + "_": Beta()}) -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1)) -def test_smab_get_state(a, b, c, d): +@given( + st.integers(min_value=1), + st.integers(min_value=1), + st.integers(min_value=1), + st.integers(min_value=1), + st.sampled_from([None, 0.1]), +) +def test_smab_get_state(a, b, c, d, epsilon): actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} - smab = SmabBernoulli(actions=actions) + smab = SmabBernoulli(actions=actions, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {}, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -236,6 +242,7 @@ def test_smab_get_state(a, b, c, d): min_size=2, ), "strategy": st.fixed_dictionaries({}), + "epsilon": st.sampled_from([None, 0.1]), } ) ) @@ -324,15 +331,16 @@ def test_smabbai_with_betacc(): st.integers(min_value=1), st.integers(min_value=1), st.floats(min_value=0, max_value=1), + st.sampled_from([None, 0.1]), ) -def test_smab_bai_get_state(a, b, c, d, exploit_p: Float01): +def test_smab_bai_get_state(a, b, c, d, exploit_p: Float01, epsilon: Optional[Float01]): actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} smab = SmabBernoulliBAI(actions=actions, exploit_p=exploit_p) expected_state = to_serializable_dict( { "actions": actions, "strategy": {"exploit_p": exploit_p}, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -362,6 +370,7 @@ def test_smab_bai_get_state(a, b, c, d, exploit_p: Float01): st.just({"exploit_p": None}), st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), ), + "epsilon": st.sampled_from([None, 0.1]), } ) ) @@ -454,20 +463,23 @@ def test_smabcc_update(): st.floats(min_value=0), st.floats(min_value=0), st.floats(min_value=0, max_value=1), + st.sampled_from([None, 0.1]), ) -def test_smab_cc_get_state(a, b, c, d, cost1: NonNegativeFloat, cost2: NonNegativeFloat, subsidy_factor: Float01): +def test_smab_cc_get_state( + a, b, c, d, cost1: NonNegativeFloat, cost2: NonNegativeFloat, subsidy_factor: Float01, epsilon: Optional[Float01] +): actions = { "action1": BetaCC(n_successes=a, n_failures=b, cost=cost1), "action2": BetaCC(n_successes=c, n_failures=d, cost=cost2), } - smab = SmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) + smab = SmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": { "subsidy_factor": subsidy_factor, }, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -498,6 +510,7 @@ def test_smab_cc_get_state(a, b, c, d, cost1: NonNegativeFloat, cost2: NonNegati st.just({"subsidy_factor": None}), st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), ), + "epsilon": st.sampled_from([None, 0.1]), } ) ) @@ -606,8 +619,8 @@ def test_smab_mo_update(): mab.update(actions=["a1", "a1"], rewards=[[1, 0, 1], [1, 1, 0]]) -@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) -def test_smab_mo_get_state(a_list): +@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6), st.sampled_from([None, 0.1])) +def test_smab_mo_get_state(a_list, epsilon): a, b, c, d, e, f = a_list actions = { @@ -626,12 +639,12 @@ def test_smab_mo_get_state(a_list): ] ), } - smab = SmabBernoulliMO(actions=actions) + smab = SmabBernoulliMO(actions=actions, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {}, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -665,6 +678,7 @@ def test_smab_mo_get_state(a_list): min_size=2, ), "strategy": st.fixed_dictionaries({}), + "epsilon": st.sampled_from([None, 0.1]), } ) ) @@ -767,8 +781,8 @@ def test_smab_mo_cc_predict(): s.predict(n_samples=n_samples, forbidden_actions=forbidden) -@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_smab_mocc_get_state(a_list): +@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8), st.sampled_from([None, 0.1])) +def test_smab_mo_cc_get_state(a_list, epsilon): a, b, c, d, e, f, g, h = a_list actions = { @@ -789,12 +803,12 @@ def test_smab_mocc_get_state(a_list): cost=h, ), } - smab = SmabBernoulliMOCC(actions=actions) + smab = SmabBernoulliMOCC(actions=actions, epsilon=epsilon) expected_state = to_serializable_dict( { "actions": actions, "strategy": {}, - "epsilon": None, + "epsilon": epsilon, "default_action": None, } ) @@ -829,6 +843,7 @@ def test_smab_mocc_get_state(a_list): min_size=2, ), "strategy": st.fixed_dictionaries({}), + "epsilon": st.sampled_from([None, 0.1]), } ) )