From 64913ef70653e9b48341caa14d29d9a9419b97e2 Mon Sep 17 00:00:00 2001 From: Shahar Bar <33932594+shaharbar1@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:58:15 +0200 Subject: [PATCH] Renew simulation framework for multi-armed bandits (#39) ### Changes * Introduced `Simulator` base abstract class for simulating multi-armed bandit environments. * Added `SmabSimulator` class for simulating stochastic multi-armed bandits (sMAB). * Added `CmabSimulator` class for simulating contextual multi-armed bandits (cMAB). * Added utility function for identifying running code environment under utils.py. * Updated pyproject.toml to include bokeh dependency for interactive visualization. * Added unit tests for the various simulators to ensure proper functionality. * Removed simulation_plots.py. * Edited pyproject.toml to remove deprecation warnings. * Added model_post_init functionality in PyBanditsBaseModel for pydantic v2 compatibility. * Added concurrent pytest running on CI&CD. --- .github/workflows/continuous_delivery.yml | 2 +- .github/workflows/continuous_integration.yml | 2 +- pybandits/base.py | 15 + pybandits/cmab_simulator.py | 173 ++++++ pybandits/pydantic_version_compatibility.py | 15 +- pybandits/simulation_cmab.py | 307 ---------- pybandits/simulation_plots.py | 45 -- pybandits/simulation_smab.py | 213 ------- pybandits/simulator.py | 614 +++++++++++++++++++ pybandits/smab_simulator.py | 140 +++++ pybandits/utils.py | 89 ++- pyproject.toml | 15 +- tests/test_cmab.py | 11 +- tests/test_cmab_simulator.py | 167 +++++ tests/test_simulation_cmab.py | 127 ---- tests/test_simulation_smab.py | 102 --- tests/test_simulator.py | 67 ++ tests/test_smab_simulator.py | 119 ++++ 18 files changed, 1416 insertions(+), 807 deletions(-) create mode 100644 pybandits/cmab_simulator.py delete mode 100644 pybandits/simulation_cmab.py delete mode 100644 pybandits/simulation_plots.py delete mode 100644 pybandits/simulation_smab.py create mode 100644 pybandits/simulator.py create mode 100644 pybandits/smab_simulator.py create mode 100644 tests/test_cmab_simulator.py delete mode 100644 tests/test_simulation_cmab.py delete mode 100644 tests/test_simulation_smab.py create mode 100644 tests/test_simulator.py create mode 100644 tests/test_smab_simulator.py diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml index ab2679e..7c2bc7b 100644 --- a/.github/workflows/continuous_delivery.yml +++ b/.github/workflows/continuous_delivery.yml @@ -40,7 +40,7 @@ jobs: poetry run pre-commit run --all-files - name: Run tests run: | - poetry run pytest -vv -k 'not time and not update_parallel' + poetry run pytest -n auto -vv - name: Extract version from pyproject.toml id: extract_version run: | diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 2853deb..6c3cdac 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -45,4 +45,4 @@ jobs: poetry run pre-commit run --all-files - name: Run tests run: | - poetry run pytest -vv -k 'not time and not update_parallel' + poetry run pytest -n auto -vv diff --git a/pybandits/base.py b/pybandits/base.py index 3b59c74..4cae4ad 100644 --- a/pybandits/base.py +++ b/pybandits/base.py @@ -36,7 +36,12 @@ ActionId = NewType("ActionId", constr(min_length=1)) Float01 = NewType("Float_0_1", confloat(ge=0, le=1)) Probability = NewType("Probability", Float01) +# SmabPredictions is a tuple of two lists: the first list contains the selected action ids, +# and the second list contains their associated probabilities SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +# CmabPredictions is a tuple of three lists: the first list contains the selected action ids, +# the second list contains their associated probabilities, +# and the third list contains their associated weighted sums CmabPredictions = NewType( "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]] ) @@ -59,6 +64,16 @@ class PyBanditsBaseModel(BaseModel, extra="forbid"): BaseModel of the PyBandits library. """ + if pydantic_version == PYDANTIC_VERSION_1: + + def __init__(self, **data): + super().__init__(**data) + + self.model_post_init(None) + + def model_post_init(self, __context: Any) -> None: + pass + def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any: """ Apply the method with the given name, adjusting for the pydantic version. diff --git a/pybandits/cmab_simulator.py b/pybandits/cmab_simulator.py new file mode 100644 index 0000000..be3fb4a --- /dev/null +++ b/pybandits/cmab_simulator.py @@ -0,0 +1,173 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import random +from typing import Dict, List, Optional, Tuple + +import numpy as np +import pandas as pd + +from pybandits.base import ActionId, BinaryReward +from pybandits.cmab import BaseCmabBernoulli +from pybandits.pydantic_version_compatibility import Field, model_validator +from pybandits.simulator import Simulator + + +class CmabSimulator(Simulator): + """ + Simulate environment for contextual multi-armed bandit models. + + This class simulates information required by the contextual bandit. Generated data are processed by the bandit with + batches of size n>=1. For each batch of samples, actions are recommended by the bandit and corresponding simulated + rewards collected. Bandit policy parameters are then updated based on returned rewards from recommended actions. + + Parameters + ---------- + mab : BaseCmabBernoulli + Contextual multi-armed bandit model + context : np.ndarray of shape (n_samples, n_feature) + Context matrix of samples features. + group : Optional[List] with length=n_samples + Group to which each sample belongs. Samples which belongs to the same group have features that come from the + same distribution and they have the same probability to receive a positive/negative feedback from each action. + If not supplied, all samples are assigned to the group. + """ + + mab: BaseCmabBernoulli = Field(validation_alias="cmab") + context: np.ndarray + group: Optional[List] = None + _base_columns: List[str] = ["batch", "action", "reward", "group"] + + @model_validator(mode="before") + @classmethod + def replace_nulls_and_validate_sizes_and_dtypes(cls, values): + context = values["context"] + batch_size = cls._get_value_with_default("batch_size", values) + n_updates = cls._get_value_with_default("n_updates", values) + group = cls._get_value_with_default("group", values) + + if len(context) != batch_size * n_updates: + raise ValueError("Context length must equal to batch_size x n_updates.") + if group is None: + group = len(context) * ["0"] + values["group"] = group + else: + if len(context) != len(group): + raise ValueError("Mismatch between context length and group length") + values["group"] = [str(g) for g in group] + mab_action_ids = list(values["mab"].actions.keys()) + index = list(set(group)) + probs_reward = cls._get_value_with_default("probs_reward", values) + if probs_reward is None: + probs_reward = pd.DataFrame(0.5, index=index, columns=mab_action_ids) + values["probs_reward"] = probs_reward + else: + if probs_reward.shape[0] != len(index): + raise ValueError("number of probs_reward rows must match the number of groups.") + return values + + def _initialize_results(self): + """ + Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results. + """ + self._results = pd.DataFrame( + columns=["action", "reward", "group", "selected_prob_reward", "max_prob_reward"], + ) + + def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + """ + Draw rewards for the selected actions based on metadata according to probs_reward + + Parameters + ---------- + actions : List[ActionId] + The actions selected by the multi-armed bandit model. + metadata : Dict[str, List] + The metadata for the selected actions; should contain the batch groups association. + + Returns + ------- + reward : List[BinaryReward] + A list of binary rewards. + """ + rewards = [int(random.random() < self.probs_reward.loc[g, a]) for g, a in zip(metadata["group"], actions)] + return rewards + + def _get_batch_step_kwargs_and_metadata( + self, batch_index + ) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], Dict[str, List]]: + """ + Extract context required for the cMAB's update and predict functionality, + as well as metadata for sample group. + + Parameters + ---------- + batch_index : int + The index of the batch. + + Returns + ------- + predict_kwargs : Dict[str, np.ndarray] + Dictionary containing the context for the batch. + update_kwargs : Dict[str, np.ndarray] + Dictionary containing the context for the batch. + metadata : Dict[str, List] + Dictionary containing the group information for the batch. + """ + idx_batch_min = batch_index * self.batch_size + idx_batch_max = (batch_index + 1) * self.batch_size + predict_and_update_kwargs = {"context": self.context[idx_batch_min:idx_batch_max]} + metadata = {"group": self.group[idx_batch_min:idx_batch_max]} + return predict_and_update_kwargs, predict_and_update_kwargs, metadata + + def _finalize_step(self, batch_results: pd.DataFrame): + """ + Finalize the step by adding additional information to the batch results. + + Parameters + ---------- + batch_results : pd.DataFrame + raw batch results + + Returns + ------- + batch_results : pd.DataFrame + batch results with added reward probability for selected action and most rewarding action + """ + group_id = batch_results.loc[:, "group"] + action_id = batch_results.loc[:, "action"] + selected_prob_reward = [self.probs_reward.loc[g, a] for g, a in zip(group_id, action_id)] + batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward + max_prob_reward = self.probs_reward.loc[group_id].max(axis=1) + batch_results.loc[:, "max_prob_reward"] = max_prob_reward.tolist() + return batch_results + + def _finalize_results(self): + """ + Finalize the simulation process. Used to add regret and cumulative regret + + Returns + ------- + None + """ + self._results["regret"] = self._results["max_prob_reward"] - self._results["selected_prob_reward"] + self._results["cum_regret"] = self._results["regret"].cumsum() diff --git a/pybandits/pydantic_version_compatibility.py b/pybandits/pydantic_version_compatibility.py index a119264..b032ecd 100644 --- a/pybandits/pydantic_version_compatibility.py +++ b/pybandits/pydantic_version_compatibility.py @@ -27,7 +27,18 @@ from typing import Any, Callable, Dict, Literal, Optional, Union from warnings import warn -from pydantic import BaseModel, Field, NonNegativeFloat, PositiveInt, ValidationError, confloat, conint, constr +from pydantic import ( + BaseModel, + Field, + NonNegativeFloat, + NonNegativeInt, + PositiveInt, + PrivateAttr, + ValidationError, + confloat, + conint, + constr, +) from pydantic.version import VERSION as _VERSION # Define the pydantic versions @@ -258,6 +269,7 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) -> "model_validator", "validate_call", "NonNegativeFloat", + "NonNegativeInt", "PositiveInt", "BaseModel", "ValidationError", @@ -265,4 +277,5 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) -> "conint", "constr", "Field", + "PrivateAttr", ] diff --git a/pybandits/simulation_cmab.py b/pybandits/simulation_cmab.py deleted file mode 100644 index 1578f32..0000000 --- a/pybandits/simulation_cmab.py +++ /dev/null @@ -1,307 +0,0 @@ -# MIT License -# -# Copyright (c) 2022 Playtika Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import random -from json import dump - -import numpy as np -import pandas as pd - -from pybandits.core.cmab import Cmab, check_context_matrix - - -class SimulationCmab: - """ - Simulate environment for contextual multi-armed bandit models. - - This class simulates information required by the contextual bandit. Generated data are processed by the bandit with - batches of size n>=1. For each batch of samples, actions are recommended by the bandit and corresponding simulated - rewards collected. Bandit policy parameters are then updated based on returned rewards from recommended actions. - - Parameters - ---------- - cmab : pybandits.core.cmab.Cmab - Contextual multi-armed bandit model - X : array_like of shape (n_samples, n_feature) - Context matrix of samples features. - batch_size: int, default=100 - The number of samples per batch. - n_updates : int, default=10 - The number of updates in the simulation. - group : list int with length=n_samples - Group to which each sample belongs. Samples which belongs to the same group have features that come from the - same distribution and they have the same probability to receive a positive/negative feedback from each action. - prob_rewards : pd.DataFrame of shape (n_groups, n_actions) - Matrix of positive reward probability for each group-action combination. If None all probs are set to 0.5. - save : bool, default=False - Boolean flag to save the results. - path : string, default='' - Path where results are saved if save=True - verbose : bool, default=False - Enable verbose output. If True produce detailed logging information about the simulation. - random_seed : int, default=None - Seed for random state. If specified, the model outputs deterministic results. - """ - - def __init__( - self, - cmab, - X, - batch_size=100, - n_updates=10, - group=None, - prob_rewards=None, - save=False, - path="", - random_seed=None, - verbose=False, - ): - # init cmab - if type(cmab) is not Cmab: - raise TypeError("cmab must be of type pybandits.core.cmab.Cmab") - self._cmab = cmab - - # init batch-size - if type(batch_size) is not int and batch_size <= 0: - raise ValueError("batch_size must be an integer > 0") - self._batch_size = batch_size - - # init n_updates - if type(n_updates) is not int and n_updates <= 0: - raise ValueError("n_updates must be an integer > 0") - self._n_updates = n_updates - - # init X - self._X = check_context_matrix(X, cmab._n_features) - if len(self._X) != batch_size * n_updates: - raise ValueError( - "Mismatch between n_samples samples in the context matrix with batch_size and n_updates. " - "len(X) must be equal to batch_size x n_updates." - ) - - # init group - if group is None: - group = len(X) * [0] # if the input argument group is not specified, set all samples group to #0 - if len(group) != len(X): - raise ValueError("The length of X must equal to the length of group") - self._n_groups = len(set(group)) - - # create matrix of probability rewards if None (by default all probs = 0.5) - if prob_rewards is None: - prob_rewards = pd.DataFrame(0.5, index=set(group), columns=cmab._actions_ids) - if prob_rewards.shape[0] != self._n_groups and self._prob_rewards.shape[1] != len(self._cmab._actions_ids): - raise ValueError( - "matrix of probability rewards should have shape ({}, {}), while detected shape is {}".format( - len(group), len(cmab._actions_ids), prob_rewards.shape - ) - ) - - # init prob_rewards, save, path, verbose - if type(path) is not str: - raise TypeError("path must be a string") - if type(save) is not bool: - raise TypeError("save must be boolean (True/False)") - if type(random_seed) is not int and random_seed is not None: - raise TypeError("random_seed must be an integer") - if type(verbose) is not bool: - raise TypeError("verbose must be boolean (True/False)") - self._prob_rewards = prob_rewards - self._save = save - self._path = path - self._verbose = verbose - - # create rewards per each sample given the matrix of probability rewards - random.seed(random_seed) - self._rewards = [ - [1 if random.random() < prob_rewards.iloc[group[i], j] else 0 for j in range(len(cmab._actions_ids))] - for i in range(batch_size * n_updates) - ] - self._rewards = pd.DataFrame(self._rewards, columns=cmab._actions_ids) - - # created DataFrame for simulation results - self.results = pd.DataFrame( - np.nan, - columns=["action", "reward", "group", "selected_prob_reward", "max_prob_reward"], - index=range(batch_size * n_updates), - ) - self.results["group"] = pd.Series(group) - - if self._verbose: - print("Setup simulation completed.") - df = pd.DataFrame( - [ - np.sum(self._rewards.loc[self.results["group"] == i]) / sum(self.results["group"] == i) - for i in range(self._n_groups) - ] - ) - df.index.name = "group" - self._prob_rewards.index.name = "group" - print("Simulated input probability rewards:\n", df, "\n") - - def run(self): - """ - Start simulation process. It consists in the following steps: - - - for i=0 to n_updates - - Extract batch[i] of samples from X - - Model recommends the best actions as the action with the highest reward probability to each simulated - sample in batch[i] and collect corresponding simulated rewards - - Model priors are updated using information from recommended actions and returned rewards - """ - - for i in range(self._n_updates): - if self._verbose: - print("Iteration #{}".format(i + 1)) - - # extract simulated data for the current batch and scale the features - idx_batch_min = i * self._batch_size - idx_batch_max = (i + 1) * self._batch_size - 1 - X_batch = self._X.loc[idx_batch_min:idx_batch_max] - - # predict - if self._verbose: - print("Start predict batch {} ...".format(i + 1)) - - actions, _ = self._cmab.fast_predict(X_batch) - - # Get reward - rewards = [self._rewards.loc[j + idx_batch_min, actions[j]] for j in range(self._batch_size)] - - # update cmab - if self._verbose: - print("Start update batch {} ...".format(i + 1), "\n") - self._cmab.update(X=X_batch, actions=actions, rewards=rewards) - - # write in simulation results - self.results.loc[idx_batch_min:idx_batch_max, "action"] = pd.Series( - actions, index=range(idx_batch_min, idx_batch_max + 1) - ) - self.results.loc[idx_batch_min:idx_batch_max, "reward"] = pd.Series( - rewards, index=range(idx_batch_min, idx_batch_max + 1) - ) - - # write for regret analysis: - # 1. extract group information - # 2. reward prob for selected action and - # 3. reward probability from most rewarding action - group_id = self.results.loc[idx_batch_min:idx_batch_max, "group"].tolist() - selected_prob_reward = [ - self._prob_rewards.iloc[group_id[k], self._cmab._actions_ids.index(actions[k])] - for k in range(len(actions)) - ] - self.results.loc[idx_batch_min:idx_batch_max, "selected_prob_reward"] = pd.Series( - selected_prob_reward, index=range(idx_batch_min, idx_batch_max + 1) - ) - max_prob_reward = [self._prob_rewards.iloc[group_id[k],].max() for k in range(len(actions))] - self.results.loc[idx_batch_min:idx_batch_max, "max_prob_reward"] = pd.Series( - max_prob_reward, index=range(idx_batch_min, idx_batch_max + 1) - ) - - # save partial results - if self._save: - self.results.to_csv(self._path + "simulation_results.csv") - - # compute expected cumulative regrets - self.results["regret"] = self.results["max_prob_reward"] - self.results["selected_prob_reward"] - self.results["cum_regret"] = self.results["regret"].cumsum() - - if self._verbose: - self._print_results() - - # store results - if self._save: - if self._verbose: - print("Saving results...") - self._save_results() - - def get_count_selected_actions(self): - """ - Get the proportion of recommended actions per group at the end of the process. - - Returns - ------- - df : pandas DataFrame - Matrix of the proportion of recommended actions per group. - """ - return { - "group " + str(i): (self.results["action"].loc[self.results["group"] == i].value_counts()).to_dict() - for i in range(self._n_groups) - } - - def get_proportion_positive_reward(self): - """ - Get the proportion of positive rewards per group/action at the end of the process. - - Returns - ------- - df : pandas DataFrame - Matrix of the proportion of positive rewards per group/action. - """ - return { - "group " + str(i): ( - self.results["action"].loc[(self.results["group"] == i) & (self.results["reward"] == 1)].value_counts() - / self.results["action"].loc[(self.results["group"] == i)].value_counts() - ).to_dict() - for i in range(self._n_groups) - } - - def get_cumulative_proportions(self, path=""): - """ - Plot results of the simulation. It will create two plots per each group which display: - - The cumulated proportion of action - - The cumulated proportion of rewards - - Parameters - ---------- - path: str, default='' - Path in which plot figures are saved. - """ - d = {} - for i in range(self._n_groups): - actions = pd.get_dummies(self.results["action"].loc[self.results["group"] == i]).reset_index(drop=True) - actions_plot = actions.cumsum().div(actions.index.values + 1, axis=0) - - rewards = pd.get_dummies(self.results["action"].loc[self.results["group"] == i]) - rewards.loc[(self.results["group"] == i) & self.results["reward"] == 0] = 0 - rewards.reset_index(inplace=True, drop=True) - rewards_plot = rewards.cumsum().div(actions.cumsum()) - - d["group " + str(i)] = {"action": actions_plot, "reward": rewards_plot} - return d - - def _print_results(self): - """Private function to print results.""" - print("Simulation results (first 10 observations):\n", self.results.head(10), "\n") - print("Count of actions selected by the bandit: \n", self.get_count_selected_actions(), "\n") - print("Observed proportion of positive rewards for each action:\n", self.get_proportion_positive_reward(), "\n") - - def _save_results(self): - """Private function to save results.""" - self.results.to_csv(self._path + "simulation_results.csv") - - f = open("count_selected_actions.json", "w") - dump(self.get_count_selected_actions(), f) - f.close() - - f = open("proportions_of_positive_rewards.json", "w") - dump(self.get_proportion_positive_reward(), f) - f.close() diff --git a/pybandits/simulation_plots.py b/pybandits/simulation_plots.py deleted file mode 100644 index fcd78bb..0000000 --- a/pybandits/simulation_plots.py +++ /dev/null @@ -1,45 +0,0 @@ -# MIT License -# -# Copyright (c) 2022 Playtika Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import matplotlib.pyplot as plt -import pandas as pd -from plotnine import aes, geom_line, ggplot, ylab - - -def plot_cumulative_proportions(matrix, figsize=None, path="", title=""): - """Plot simulation.""" - fig, ax = plt.subplots(figsize=figsize) - ax.plot(matrix) - ax.set_title(title) - ax.set_xlabel("number of observations") - ax.set_ylim(-0.1, 1.1) - ax.set_ylabel("Cumulative proportion") - ax.legend(matrix.columns) - fig.savefig(path + title) - - return fig, ax - - -def plot_regrets(cum_regret): - """Plot cumulative regrets.""" - regrets = pd.Series(cum_regret).reset_index().rename(columns={"index": "number of observations"}) - return ggplot(regrets, aes(y="cum_regret", x="number of observations")) + geom_line() + ylab("cumulative regret") diff --git a/pybandits/simulation_smab.py b/pybandits/simulation_smab.py deleted file mode 100644 index 1780369..0000000 --- a/pybandits/simulation_smab.py +++ /dev/null @@ -1,213 +0,0 @@ -# MIT License -# -# Copyright (c) 2022 Playtika Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import random - -import numpy as np -import pandas as pd - -from pybandits.core.smab import Smab - - -class SimulationSmab: - """ - Simulate environment for stochastic multi-armed bandits. - - This class performs simulation of stochastic Multi-Armed Bandits (sMAB). Data are processed in batches of size n>=1. - Per each batch of simulated samples, the sMAB selects one action and collects the corresponding simulated reward for - each sample. Then, prior parameters are updated based on returned rewards from recommended actions. - - Parameters - ---------- - smab : pybandits.core.smab.Smab - Stochastic multi-armed bandit model. - n_updates : int, default=10 - The number of updates (i.e. batches of samples) in the simulation. - batch_size: int, default=100 - The number of samples per batch. - probs_reward : dict, default=None - The reward probability for the different actions. If None probabilities are set to 0.5. - The keys of the dict must match the smab actions_ids, and the values are float in the interval [0, 1]. - e.g. probs_reward={'action A': 0.6, 'action B': 0.8, 'action C': 1.} - save : bool, default=False - Boolean flag to save the results. - path : string, default='' - Path where results are saved if save=True - random_seed : int, default=None - Seed for random state. If specified, the model outputs deterministic results. - verbose : bool, default=False - Enable verbose output. If True, detailed logging information about the simulation are provided. - """ - - def __init__( - self, - smab, - n_updates=10, - batch_size=100, - probs_reward=None, - save=False, - path="", - random_seed=None, - verbose=False, - ): - if type(smab) is not Smab: - raise TypeError("smab must be of type pybandits.core.smab.Smab") - if type(n_updates) is not int and n_updates <= 0: - raise ValueError("n_updates must be an integer > 0") - if type(batch_size) is not int and batch_size <= 0: - raise ValueError("batch_size must be an integer > 0") - if type(save) is not bool: - raise TypeError("save must be boolean (True/False)") - if type(path) is not str: - raise TypeError("path must be a string") - if type(random_seed) is not int and random_seed is not None: - raise TypeError("random_seed must be an integer") - if type(verbose) is not bool: - raise TypeError("verbose must be boolean (True/False)") - - if probs_reward is None: - probs_reward = {k: v for (k, v) in zip(smab._actions_ids, len(smab._actions_ids) * [0.5])} - if ( - type(probs_reward) is not dict - or not all(isinstance(x, str) for x in probs_reward.keys()) - or not all(isinstance(x, float) for x in probs_reward.values()) - ): - raise TypeError("probs_reward must be a dict with string as keys and float as values.") - if set(probs_reward.keys()) != set(smab._actions_ids): - raise ValueError("probs_reward dict keys must match smab actions_ids.") - if all(v > 1 for v in probs_reward.values()) or all(v < 0 for v in probs_reward.values()): - raise ValueError("probs_reward values must be in the interval [0, 1].") - - self._smab = smab - self._n_updates = n_updates - self._batch_size = batch_size - self._probs_reward = probs_reward - self._save = save - self._path = path - self._random_seed = random_seed - self._verbose = verbose - - # created DataFrame for simulation results - self.results = pd.DataFrame(np.nan, columns=["action", "reward"], index=range(batch_size * n_updates)) - - def run(self): - """ - Start simulation process. It consists in the following steps: - for i=0 to n_updates - Consider batch[i] of observation - sMAB selects the best action as the action with the highest reward probability to each sample in - batch[i]. - Rewards are returned for each recommended action - Prior parameters are updated based on recommended actions and returned rewards - """ - for i in range(self._n_updates): - # select actions for batch #i - actions, _ = self._smab.predict(n_samples=self._batch_size) - - # find min and max indexes for batch #i - idx_batch_min = i * self._batch_size - idx_batch_max = (i + 1) * self._batch_size - 1 - - # write the selected actions for batch #i in the results matrix - self.results.loc[idx_batch_min:idx_batch_max, "action"] = actions - - for a in self._smab._actions_ids: - # simulate the rewards - random.seed(self._random_seed) - rewards = [1 if random.random() < self._probs_reward[a] else 0 for i in range(actions.count(a))] - - # find indexes of the action 'a' in the array 'actions' - idx = [i for i in range(len(actions)) if actions[i] == a] - - # write rewards for batch #i and action 'a' in the result matrix - self.results.loc[[idx_batch_min + i for i in idx], "reward"] = rewards - - # update the stochastic multi-armed bandit model - self._smab.update(action_id=a, n_successes=rewards.count(1), n_failures=rewards.count(0)) - - # print results - if self._verbose: - self._print_results() - - # store results - if self._save: - if self._verbose: - print("Saving results at {}".format(self._path)) - self._save_results() - - def get_count_selected_actions(self): - """ - Get the count of actions selected by the bandit at the end of the process. - - Returns - ------- - dict - Dictionary with keys=action_ids and values=count of recommended actions. - """ - return dict(self.results.action.value_counts()) - - def get_proportion_positive_reward(self): - """ - Get the observed proportion of positive rewards for each action at the end of the simulation process. - - Returns - ------- - dict - Dictionary with keys=action_ids and values=proportion of positive rewards for each action. - """ - d = {} - for a in self._smab._actions_ids: - x = self.results[self.results.action == a] - d[a] = sum(x.reward) / len(x) - return d - - def get_cumulative_proportions(self): - """ - Get (i) the cumulative action proportions and (ii) the cumulative reward proportions per action. - - Returns - ------- - dict - Dictionary with keys=(actions, reward) and - values=(cumulative action proportions, cumulative reward proportions per action) - """ - actions = pd.get_dummies(self.results["action"]).reset_index(drop=True) - actions_plot = actions.cumsum().div(actions.index.values + 1, axis=0) - - rewards = pd.get_dummies(self.results["action"]) - rewards.loc[self.results["reward"] == 0] = 0 - rewards.reset_index(inplace=True, drop=True) - rewards_plot = rewards.cumsum().div(actions.cumsum()) - - return {"action": actions_plot, "reward": rewards_plot} - - def _print_results(self): - """Private function to print results.""" - print("Simulation results (first 10 observations):\n", self.results.head(10), "\n") - print("Count of actions selected by the bandit: \n", self.get_count_selected_actions(), "\n") - print("Observed proportion of positive rewards for each action:\n", self.get_proportion_positive_reward(), "\n") - - def _save_results(self): - """Private function to save results.""" - self.results.to_csv("simulation_results.csv", index=False) - with open(self._path + "summary.txt", "w") as f: - f.write(str(self.get_count_selected_actions()) + "\n" + str(self.get_proportion_positive_reward())) diff --git a/pybandits/simulator.py b/pybandits/simulator.py new file mode 100644 index 0000000..c9f143a --- /dev/null +++ b/pybandits/simulator.py @@ -0,0 +1,614 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os.path +import random +from abc import ABC, abstractmethod +from functools import cached_property +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +from bokeh.core.enums import Palette +from bokeh.layouts import layout +from bokeh.models import ColumnDataSource, HoverTool, Legend, Plot, TabPanel +from bokeh.palettes import Category10, Turbo256 +from bokeh.plotting import figure +from loguru import logger + +from pybandits.base import ActionId, BinaryReward, PyBanditsBaseModel +from pybandits.mab import BaseMab +from pybandits.pydantic_version_compatibility import ( + PYDANTIC_VERSION_1, + PYDANTIC_VERSION_2, + NonNegativeInt, + PositiveInt, + PrivateAttr, + field_validator, + model_validator, + pydantic_version, +) +from pybandits.utils import in_jupyter_notebook, visualize_via_bokeh + + +class Simulator(PyBanditsBaseModel, ABC): + """ + Simulate environment for multi-armed bandits. + + This class performs simulation of Multi-Armed Bandits (MAB). Data are processed in batches of size n>=1. + Per each batch of simulated samples, the mab selects one action and collects the corresponding simulated reward for + each sample. Then, prior parameters are updated based on returned rewards from recommended actions. + + Parameters + ---------- + mab : BaseMab + MAB model. + n_updates : PositiveInt, defaults to 10 + The number of updates (i.e. batches of samples) in the simulation. + batch_size: PositiveInt, defaults to 100 + The number of samples per batch. + probs_reward : Optional[pd.DataFrame], default=None + The reward probability for the different actions. If None probabilities are set to 0.5. + The keys of the dict must match the mab actions_ids, and the values are float in the interval [0, 1]. + e.g. probs_reward=pd.DataFrame({"a1 A": [0.6], "a2 B": [0.5], "a3": [0.8]}). + Note that currently only single-objective reward is supported. + save : bool, defaults to False + Boolean flag to save the results. + path : string, default to '' + Path where_results are saved if save=True + file_prefix : string, default to '' + Prefix for the file name where results are saved. + random_seed : int, default=None + Seed for random state. If specified, the model outputs deterministic results. + verbose : bool, default=False + Enable verbose output. If True, detailed logging information about the simulation are provided. + visualize : bool, default=False + Enable visualization of the simulation results. + """ + + mab: BaseMab + n_updates: PositiveInt = 10 + batch_size: PositiveInt = 100 + probs_reward: Optional[pd.DataFrame] = None + save: bool = False + path: str = "" + file_prefix: str = "" + random_seed: Optional[NonNegativeInt] = None + verbose: bool = False + visualize: bool = False + _results: pd.DataFrame = PrivateAttr() + _base_columns: List[str] = PrivateAttr() + _cumulative_col_prefix: str = "cum" + # Define dash patterns, markers, and colors for lines + _dash_patterns = ["solid", "dashed", "dotted"] + _markers = ["circle", "square", "triangle", "diamond", "star"] + + ############################################ Instance Input Validators ############################################# + + if pydantic_version == PYDANTIC_VERSION_1: + + class Config: + arbitrary_types_allowed = True + allow_population_by_field_name = True + + elif pydantic_version == PYDANTIC_VERSION_2: + model_config = {"arbitrary_types_allowed": True, "populate_by_name": True} + else: + raise ValueError(f"Unsupported pydantic version: {pydantic_version}") + + @field_validator("probs_reward", mode="before") + @classmethod + def validate_probs_reward_values(cls, value): + if value is not None: + if not all(value.dtypes.apply(lambda x: x.kind == "f")): + raise ValueError("probs_reward values must be float.") + if not value.applymap(lambda x: 0 <= x <= 1).all().all(): + raise ValueError("probs_reward values must be in the interval [0, 1].") + return value + + @field_validator("file_prefix", mode="before") + def maybe_alter_file_prefix(cls, value): + return f"{value}_" if value else "" + + @model_validator(mode="before") + @classmethod + def validate_probs_reward_columns(cls, values): + if "probs_reward" in values and values["probs_reward"] is not None: + mab_action_ids = list(values["mab"].actions.keys()) + if set(values["probs_reward"].columns) != set(mab_action_ids): + raise ValueError("probs_reward columns must match mab actions ids.") + if values["probs_reward"].shape[1] != len(mab_action_ids): + raise ValueError("probs_reward columns must be the same as the number of MAB actions.") + return values + + @model_validator(mode="before") + @classmethod + def validate_visualize_without_save(cls, values): + visualize = cls._get_value_with_default("visualize", values) + save = cls._get_value_with_default("save", values) + if visualize and not save and not in_jupyter_notebook(): + raise ValueError("Visualize cannot be enabled without saving the results if shell is not Jupyter.") + return values + + #################################################################################################################### + + def model_post_init(self, __context: Any) -> None: + # set random seed for reproducibility + random.seed(self.random_seed) + np.random.default_rng(self.random_seed) + self._initialize_results() + + @abstractmethod + def _initialize_results(self): + """ + Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results. + """ + pass + + @property + def results(self): + return self._results + + def run(self): + """ + Start simulation process. It consists in the following steps: + for i=0 to n_updates + Consider batch[i] of observation + mab selects the best action as the action with the highest reward probability to each sample in + batch[i]. + Rewards are returned for each recommended action + Prior parameters are updated based on recommended actions and returned rewards + """ + for batch_index in range(self.n_updates): + predict_kwargs, update_kwargs, metadata = self._get_batch_step_kwargs_and_metadata(batch_index) + self._step(batch_index, metadata, predict_kwargs, update_kwargs) + + self._finalize_results() + + # print results + if self.verbose: + self._print_results() + + if self.visualize: + self._visualize_results() + + # store results + if self.save: + if self.verbose: + logger.info(f"Saving results at {self.path}") + self._save_results() + + def _step( + self, + batch_index: int, + metadata: Dict[str, List], + predict_kwargs: Dict[str, Union[int, np.ndarray]], + update_kwargs: Dict[str, np.ndarray], + ): + """ + Perform a step of the simulation process. It consists in the following steps: + - select actions for batch via mab.predict + - draw rewards for the selected actions based on metadata according to probs_reward + - write the selected actions for batch #i in the results matrix + - update the mab model with the selected actions and the corresponding rewards via mab.update + + Parameters + ---------- + batch_index : int + The index of the batch. + metadata : Dict[str, List] + The metadata for the selected actions. + predict_kwargs : Dict[str, Union[int, np.ndarray]] + Dictionary containing the keyword arguments for the batch used in mab.predict. + update_kwargs : Dict[str, np.ndarray] + Dictionary containing the keyword arguments for the batch used in mab.update. + """ + # select actions for batch #index + predictions = self.mab.predict(**predict_kwargs) + actions = predictions[0] # location 0 is the actions for both SmabPredictions and CmabPredictions + rewards = self._draw_rewards(actions, metadata) + # write the selected actions for batch #i in the results matrix + batch_results = pd.DataFrame({"action": actions, "reward": rewards, "batch": batch_index, **metadata}) + batch_results = self._finalize_step(batch_results) + if not all(col in batch_results.columns for col in self._base_columns): + raise ValueError(f"The batch results must contain the {self._base_columns} columns") + self._results = pd.concat((self._results, batch_results), ignore_index=True) + self.mab.update(actions=actions, rewards=rewards, **update_kwargs) + + @abstractmethod + def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + """ + Draw rewards for the selected actions based on metadata according to probs_reward. + + Parameters + ---------- + actions : List[ActionId] + The actions selected by the multi-armed bandit model. + metadata : Dict[str, List] + The metadata for the selected actions. + + Returns + ------- + reward : List[BinaryReward] + A list of binary rewards. + """ + pass + + @abstractmethod + def _get_batch_step_kwargs_and_metadata( + self, batch_index: int + ) -> Tuple[Dict[str, Union[int, np.ndarray]], Dict[str, np.ndarray], Dict[str, List]]: + """ + Extract kwargs required for the MAB's update and predict functionality, + as well as metadata for sample association. + + Parameters + ---------- + batch_index : int + The index of the batch. + + Returns + ------- + predict_kwargs : Dict[str, Union[int, np.ndarray]] + Dictionary containing the keyword arguments for the batch used in mab.predict. + update_kwargs : Dict[str, Any] + Dictionary containing the keyword arguments for the batch used in mab.update. + metadata : Dict[str, List] + Dictionary containing the association information for the batch. + """ + pass + + @abstractmethod + def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + """ + Finalize the step by adding additional information to the batch results. + + Parameters + ---------- + batch_results : pd.DataFrame + raw batch results + + Returns + ------- + batch_results : pd.DataFrame + batch results with added columns + """ + pass + + @abstractmethod + def _finalize_results(self): + """ + Finalize the simulation process. It can be used to add additional information to the results. + + Returns + ------- + None + """ + pass + + @cached_property + def _action_ids(self) -> List[ActionId]: + """ + Get the list of actions. + + Returns + ------- + List[ActionId] + The list of actions + """ + return sorted(list(self.mab.actions.keys())) + + @cached_property + def _cumulative_actions_cols(self) -> List[str]: + """ + Get the list of cumulative actions columns. + + Returns + ------- + : List[str] + The list of cumulative actions columns + """ + return [f"{self._cumulative_col_prefix}_{action}" for action in self._action_ids] + + @property + def _colors(self) -> Palette: + """ + Get the palette of colors. + + Returns + ------- + : Palette + Palette of colors + """ + n_actions = len(self._action_ids) + category10_keys = Category10.keys() + return Category10[max(n_actions, min(category10_keys))] if n_actions <= max(category10_keys) else Turbo256 + + @classmethod + def _impute_missing_counts(cls, df, action_ids): + """ + Impute missing counts for actions in the data frame. + + Parameters + ---------- + df : pd.DataFrame + Data frame with counts of actions selected by the bandit. + action_ids : List[ActionId] + List of action ids. + + Returns + ------- + df : pd.DataFrame + Data frame with imputed missing counts for actions. + """ + for action_id in action_ids: + if action_id not in df.columns: + df[action_id] = 0 + return df + + @property + def selected_actions_count(self) -> pd.DataFrame: + """ + Get the count of actions selected by the bandit on each batch and + at the end of the simulation process. + + Returns + ------- + counts_df : pd.DataFrame + Data frame with batch serial number as index (or total for all batches), actions as columns, + and count of recommended actions as values + """ + groupby_cols = [col for col in self._base_columns if col not in ["reward", "action"]] + counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack(fill_value=0).reset_index() + action_ids = self._action_ids + counts_df = self._impute_missing_counts(counts_df, action_ids) + reordered_cols = groupby_cols + action_ids + counts_df = counts_df[reordered_cols] + cumulative_actions_cols = self._cumulative_actions_cols + groupby_cols.remove("batch") + counts_df[cumulative_actions_cols] = ( + counts_df.groupby(groupby_cols)[action_ids].cumsum() if groupby_cols else counts_df[action_ids].cumsum() + ) + if groupby_cols: + grouped_counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack().fillna(0) + grouped_counts_df = self._impute_missing_counts(grouped_counts_df, action_ids) + grouped_counts_df = grouped_counts_df.assign(batch="total").set_index(["batch"], append=True).reset_index() + grouped_counts_df[cumulative_actions_cols] = grouped_counts_df[action_ids] + else: + grouped_counts_df = pd.DataFrame() + total_counts_df = counts_df.sum(axis=0).to_frame().T + total_counts_df = ( + total_counts_df.assign(batch="total", **{col: "total" for col in groupby_cols}) + .set_index(["batch"], drop=True) + .reset_index() + ) + total_counts_df[cumulative_actions_cols] = total_counts_df[action_ids] + counts_df = pd.concat((counts_df, grouped_counts_df, total_counts_df), axis=0, ignore_index=True).set_index( + groupby_cols + ["batch"], drop=True + ) + return counts_df + + @property + def positive_reward_proportion(self) -> pd.DataFrame: + """ + Get the observed proportion of positive rewards for each a1t the end of the simulation process. + + Returns + ------- + proportion_df : pd.DataFrame + Data frame with actions as index, and proportion of positive rewards as values + """ + groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch"]] + proportion_df = self._results.groupby(groupby_cols)["reward"].mean().to_frame(name="proportion") + return proportion_df + + def _print_results(self): + """Private function to print results.""" + logger.info("Simulation results (first 10 observations):\n", self._results.head(10), "\n") + logger.info("Count of actions selected by the bandit: \n", self.selected_actions_count.iloc[-1], "\n") + logger.info("Observed proportion of positive rewards for each action:\n", self.positive_reward_proportion, "\n") + + def _save_results(self): + """Private function to save results.""" + self._results.to_csv(self._get_save_path("simulation_results.csv"), index=False) + self.selected_actions_count.to_csv(self._get_save_path("selected_actions_count.csv"), index=True) + self.positive_reward_proportion.to_csv(self._get_save_path("positive_reward_proportion.csv"), index=True) + + def _get_save_path(self, file_name: str) -> str: + """ + Private function to get the save path. + + Parameters + ---------- + file_name : str + The file name. + + Returns + ------- + full_path : str + The full path to save the file with attached path and name prefix. + """ + full_path = os.path.join(self.path, f"{self.file_prefix}{file_name}") + return full_path + + def _visualize_results(self): + """Private function to visualize results.""" + action_ids = self._action_ids + cumulative_actions_cols = self._cumulative_actions_cols + selected_actions_count = self.selected_actions_count + selected_actions_rate = 100 * pd.merge( + selected_actions_count[action_ids].div(selected_actions_count[action_ids].sum(axis=1), axis=0), + selected_actions_count[cumulative_actions_cols].div( + selected_actions_count[cumulative_actions_cols].sum(axis=1), axis=0 + ), + left_index=True, + right_index=True, + ) + step_actions_rate = selected_actions_rate[(selected_actions_rate.reset_index().batch != "total").values] + step_actions_rate = ( + step_actions_rate.unstack(level=list(range(step_actions_rate.index.nlevels))) + .to_frame("value") + .reset_index() + ) + groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch", "action"]] + grouped_df = ( + step_actions_rate.groupby(groupby_cols if len(groupby_cols) > 1 else groupby_cols[0]) + if groupby_cols + else [("", step_actions_rate)] + ) + + # plot using bokeh + tabs = [] + for group_name, rates_df in grouped_df: + if len(groupby_cols) == 1: + group_name = (group_name,) + elif len(groupby_cols) == 0: + group_name = tuple() + overall_actions_rate = selected_actions_rate.loc[group_name + ("total",)].to_frame("total").reset_index() + overall_actions_rate = overall_actions_rate[overall_actions_rate["action"].isin(action_ids)] + + # rate vs step line plot + step_legend_items = [] + fig_steps = figure( + title="Selected actions rate across steps", + x_axis_label="Batch index", + y_axis_label="Rate [%]", + ) + for i, action in enumerate(action_ids): + if action not in sorted(rates_df.action.unique()): + continue + self._add_line_to_figure(fig_steps, step_legend_items, rates_df, i, action) + + self._add_legend_to_figure(step_legend_items, fig_steps) + fig_steps.add_tools(HoverTool(tooltips=[("batch", "@batch"), ("action", "@action"), ("value", "@value")])) + + # Overall selected actions bars plot + fig_overall = figure( + title="Overall selected actions rate", + x_axis_label="Action", + y_axis_label="Rate [%]", + x_range=overall_actions_rate["action"], + ) + fig_overall.vbar(x="action", top="total", width=0.9, source=ColumnDataSource(overall_actions_rate)) + fig_overall.xgrid.grid_line_color = None + fig_overall.add_tools(HoverTool(tooltips=[("action", "@action"), ("rate", "@total")])) + + # cumulative rate vs step line plot + cum_legend_items = [] + fig_cumulative_steps = figure( + title="Cumulative selected actions rate across steps", + x_axis_label="Batch index", + y_axis_label="Rate [%]", + ) + for i, (action, cum_action) in enumerate(zip(action_ids, cumulative_actions_cols)): + if action not in rates_df.action.unique(): + continue + self._add_line_to_figure(fig_cumulative_steps, cum_legend_items, rates_df, i, action, cum_action) + + self._add_legend_to_figure(cum_legend_items, fig_cumulative_steps) + fig_cumulative_steps.add_tools( + HoverTool(tooltips=[("batch", "@batch"), ("action", "@action"), ("value", "@value")]) + ) + + tabs.append( + TabPanel( + child=layout(children=[[fig_steps, fig_overall], [fig_cumulative_steps]]), + title=f"{'_'.join([str(name_part) for name_part in group_name])}", + ) + ) + visualize_via_bokeh(self._get_save_path("simulation_results.html"), tabs) + + def _add_line_to_figure( + self, + fig: Plot, + legend_items: List[Tuple[str, List]], + df: pd.DataFrame, + index: int, + action: ActionId, + action_data_source_id: Optional[str] = None, + ): + """ + Add a line corresponding to action based on filtering df using action_data_source_id to the figure. + + Parameters + ---------- + fig : Plot + Bokeh figure for which a line should be added. + legend_items : List[Tuple[str, List] + List of legend elements, given by tuples of name and associated plot members. + df : DataFrame + Data frame to filter for line data. + index : int + Line serial number. + action : ActionId + Subjected action. + action_data_source_id : Optional[str], resorts to action if not specified + Corresponding value to action to filter df by. + """ + + action_data_source_id = action_data_source_id or action + + dash_pattern = self._get_modulus_element(index, self._dash_patterns) + marker = self._get_modulus_element(index, self._markers) + color = self._get_modulus_element(index, self._colors) + + action_data = df[df.action == action_data_source_id] + action_source = ColumnDataSource(action_data) + line = fig.line("batch", "value", source=action_source, line_width=2, color=color, line_dash=dash_pattern) + scatter = fig.scatter("batch", "value", source=action_source, size=8, color=color, marker=marker) + legend_items.append((action, [line, scatter])) + + @staticmethod + def _add_legend_to_figure(legend_items: List[Tuple[str, List]], fig: Plot): + """ + Add legend with the legend items to fig. + + Parameters + ---------- + legend_items : List[Tuple[str, List] + List of legend elements, given by tuples of name and associated plot members. + fig : Plot + Bokeh figure for which a legend should be added. + """ + legend = Legend(items=legend_items) + legend.title = "Actions" + legend.location = "right" + legend.click_policy = "hide" + fig.add_layout(legend, "right") + + @staticmethod + def _get_modulus_element(index: int, elements: List): + """ + Get the element of the list at the index modulo the length of the list. + + Parameters + ---------- + index : int + Required index + elements : List + List of elements. + + Returns + ------- + Element of the list at the index modulo the length of the list + """ + return elements[index % len(elements)] diff --git a/pybandits/smab_simulator.py b/pybandits/smab_simulator.py new file mode 100644 index 0000000..400fd1b --- /dev/null +++ b/pybandits/smab_simulator.py @@ -0,0 +1,140 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import random +from typing import Dict, List, Tuple + +import numpy as np +import pandas as pd + +from pybandits.base import ActionId, BinaryReward +from pybandits.pydantic_version_compatibility import Field, model_validator +from pybandits.simulator import Simulator +from pybandits.smab import BaseSmabBernoulli + + +class SmabSimulator(Simulator): + """ + Simulate environment for stochastic multi-armed bandits. + + This class performs simulation of stochastic Multi-Armed Bandits (sMAB). Data are processed in batches of size n>=1. + Per each batch of simulated samples, the mab selects one action and collects the corresponding simulated reward for + each sample. Then, prior parameters are updated based on returned rewards from recommended actions. + + Parameters + ---------- + mab : BaseSmabBernoulli + sMAB model. + """ + + mab: BaseSmabBernoulli = Field(validation_alias="smab") + _base_columns: List[str] = ["batch", "action", "reward"] + + @model_validator(mode="before") + @classmethod + def replace_null_and_validate_probs_reward(cls, values): + mab_action_ids = list(values["mab"].actions.keys()) + probs_reward = cls._get_value_with_default("probs_reward", values) + if probs_reward is None: + probs_reward = pd.DataFrame(0.5, index=[0], columns=mab_action_ids) + values["probs_reward"] = probs_reward + else: + if len(probs_reward) != 1: + raise ValueError("probs_reward must have exactly one row.") + return values + + def _initialize_results(self): + """ + Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results. + """ + self._results = pd.DataFrame(columns=["batch", "action", "reward"]) + + def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + """ + Draw rewards for the selected actions according to probs_reward. + + Parameters + ---------- + actions : List[ActionId] + The actions selected by the multi-armed bandit model. + metadata : Dict[str, List] + The metadata for the selected actions. Not used in this implementation. + + Returns + ------- + reward : List[BinaryReward] + A list of binary rewards. + """ + rewards = [int(random.random() < self.probs_reward.loc[0, a]) for a in actions] + return rewards + + def _get_batch_step_kwargs_and_metadata( + self, batch_index + ) -> Tuple[Dict[str, int], Dict[str, np.ndarray], Dict[str, List]]: + """ + Extract context required for the sMAB's update and predict functionality, + as well as metadata for sample group. + + Parameters + ---------- + batch_index : int + The index of the batch. + + Returns + ------- + predict_kwargs : Dict[str, int] + Dictionary containing the number of samples for sMAB prediction. + update_kwargs : Dict[str, np.ndarray] + Dictionary containing nothing. + metadata : Dict[str, List] + Dictionary containing nothing. + """ + predict_kwargs = {"n_samples": self.batch_size} + update_kwargs = {} + metadata = {} + return predict_kwargs, update_kwargs, metadata + + def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + """ + Finalize the step by adding additional information to the batch results. + + Parameters + ---------- + batch_results : pd.DataFrame + raw batch results + + Returns + ------- + batch_results : pd.DataFrame + same raw batch results + """ + return batch_results + + def _finalize_results(self): + """ + Finalize the simulation process. It can be used to add additional information to the results. + + Returns + ------- + None + """ + pass diff --git a/pybandits/utils.py b/pybandits/utils.py index d0577b5..3d8a62f 100644 --- a/pybandits/utils.py +++ b/pybandits/utils.py @@ -1,5 +1,31 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import json -from typing import Any, Callable, Dict, List, Union +from typing import Any, Callable, Dict, List, Optional, Union + +from bokeh.io import curdoc, output_file, output_notebook, save, show +from bokeh.models import InlineStyleSheet, TabPanel, Tabs +from IPython import get_ipython from pybandits.pydantic_version_compatibility import validate_call @@ -42,3 +68,64 @@ def extract_argument_names_from_function(function_handle: Callable, is_class_met start_index = int(is_class_method) argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount] return argument_names + + +def in_jupyter_notebook() -> bool: + """ + Check if the code is running in a Jupyter notebook. + + Reference: https://stackoverflow.com/a/39662359 + + Returns + ------- + bool + True if the code is running in a Jupyter notebook, False otherwise. + + Raises + ------ + NotImplementedError + If the shell type is neither Jupyter notebook nor terminal. + """ + + try: + shell = get_ipython().__class__.__name__ + + return shell == "ZMQInteractiveShell" + + except NameError: + return False # Probably standard Python interpreter + + +def visualize_via_bokeh(output_path: Optional[str], tabs: List[TabPanel]): + """ + Visualize output to either a Jupyter notebook or an HTML file. + + Parameters + ---------- + output_path : Optional[str] + Path to the output file. Required if not running in a Jupyter notebook. + tabs : List[TabPanel] + List of TabPanel objects to visualize. + """ + + if in_jupyter_notebook(): + output_notebook() + else: + if output_path is None: + raise ValueError("output_path is required when not running in a Jupyter notebook.") + output_file(output_path) + + # Add a Div model to the Bokeh layout for flexible tabs + tabs_css = """ + :host(.bk-Tabs) .bk-header { + flex-wrap: wrap !important; + } + """ + + tabs_stylesheet = InlineStyleSheet(css=tabs_css) + curdoc().title = "Visual report" + styled_tabs = Tabs(tabs=tabs, stylesheets=[tabs_stylesheet], sizing_mode="stretch_both") + if in_jupyter_notebook(): + show(styled_tabs) + else: + save(styled_tabs) diff --git a/pyproject.toml b/pyproject.toml index 91bf9fa..fb3e0e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "1.0.2" +version = "1.1.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", @@ -16,10 +16,12 @@ readme = "README.md" python = ">=3.8.1,<3.12" loguru = "^0.6" numpy = "^1.23" -pydantic = ">=1.10" +pydantic = "1.10.*" scipy = "^1.9" pymc = "^5.3" scikit-learn = "^1.1" +bokeh = "^3.1" + [tool.poetry.group.dev.dependencies] hypothesis = "^6.68.2" @@ -34,8 +36,10 @@ ipykernel = "^6.21.3" jupyterlab = "^3.6.1" pytest-cov = "^4.0.0" pytest_mock = "^3.14.0" +pytest-xdist = "^3.6.1" ruff = "^0.5.6" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" @@ -46,14 +50,13 @@ plugins = "pydantic.mypy" [tool.ruff] line-length = 120 extend-include = ["*.ipynb"] -extend-ignore = ["E203"] + # pylint configuration incorporated in Ruff [tool.ruff.lint] # Enable the isort rules. extend-select = ["I"] - -[tool.ruff.per-file-ignores] +extend-ignore = ["E203"] # disable check for: # D100: Missing docstring in public module (equivalent to C0114) # D101: Missing docstring in public class (equivalent to C0115) @@ -64,4 +67,4 @@ extend-select = ["I"] # D106: Missing docstring in public nested class (equivalent to C0115) # Missing : Too few public methods (equivalent to R0903) # PLR0913: Too many arguments (equivalent to R0913) -"*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "PLR0913"] +per-file-ignores = { "*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "PLR0913"] } diff --git a/tests/test_cmab.py b/tests/test_cmab.py index fdf2173..208f381 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -27,12 +27,17 @@ import pytest from hypothesis import given, settings from hypothesis import strategies as st -from pydantic import NonNegativeFloat, ValidationError from pybandits.base import Float01 from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT, UpdateMethods -from pybandits.pydantic_version_compatibility import PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, pydantic_version +from pybandits.pydantic_version_compatibility import ( + PYDANTIC_VERSION_1, + PYDANTIC_VERSION_2, + NonNegativeFloat, + ValidationError, + pydantic_version, +) from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit from pybandits.utils import to_serializable_dict from tests.test_utils import is_serializable @@ -783,7 +788,7 @@ def test_cmab_cc_predict(n_samples, n_features): assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples -@settings(deadline=10000) +@settings(deadline=None) @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) def test_cmab_cc_update(n_samples, n_features, update_method): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() diff --git a/tests/test_cmab_simulator.py b/tests/test_cmab_simulator.py new file mode 100644 index 0000000..cfb8633 --- /dev/null +++ b/tests/test_cmab_simulator.py @@ -0,0 +1,167 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +from tempfile import TemporaryDirectory + +import numpy as np +import pandas as pd +import pytest +from hypothesis import given, settings +from hypothesis import strategies as st +from pytest_mock import MockerFixture + +from pybandits.cmab import CmabBernoulli +from pybandits.cmab_simulator import CmabSimulator + + +def test_mismatched_probs_reward_columns(mocker: MockerFixture, groups=[0, 1]): + def check_value_error(probs_reward, context): + with pytest.raises(ValueError): + CmabSimulator(mab=cmab, probs_reward=probs_reward, groups=groups, context=context) + + num_groups = len(groups) + cmab = mocker.Mock(spec=CmabBernoulli) + cmab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()} + cmab.epsilon = 0.0 + cmab.default_action = None + context = pd.DataFrame({"a1": [0.5] * num_groups, "a2": [0.5] * num_groups}) + probs_reward = pd.DataFrame({"a1": [0.5], "a2": [0.5]}) + check_value_error(probs_reward, context) + probs_reward = pd.DataFrame({"a1": [0.5] * num_groups, "a2": [0.5] * num_groups}) + check_value_error(probs_reward, context[:1]) + + +def test_cmab_e2e_simulation_with_default_arguments( + action_ids=["a1", "a2"], n_features=3, n_updates=2, batch_size=10, num_groups=2 +): + mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features) + base_groups = list(range(num_groups)) + group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)] + context = ( + np.repeat(np.arange(3).reshape(1, -1), n_updates * batch_size, axis=0).T * (np.array(group) - np.mean(group)) + ).T + with TemporaryDirectory() as path: + simulator = CmabSimulator( + mab=mab, + visualize=True, + save=True, + path=path, + group=[str(g) for g in group], + batch_size=batch_size, + n_updates=n_updates, + context=context, + ) + simulator.run() + assert not simulator.results.empty + dir_list = os.listdir(path) + assert "simulation_results.csv" in dir_list + assert "selected_actions_count.csv" in dir_list + assert "positive_reward_proportion.csv" in dir_list + assert "simulation_results.html" in dir_list + + +@settings(deadline=None) +@given( + st.just(["a1", "a2"]), + st.just(3), + st.integers(min_value=1, max_value=3), + st.integers(min_value=1, max_value=10), + st.booleans(), + st.sampled_from([None, 0, 42]), + st.booleans(), + st.booleans(), + st.sampled_from(["", "unit_test"]), + st.integers(min_value=1, max_value=3), +) +def test_cmab_e2e_simulation_with_non_default_args( + action_ids, + n_features, + n_updates, + batch_size, + save, + random_seed, + verbose, + visualize, + file_prefix, + num_groups, +): + base_groups = list(range(num_groups)) + group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)] + effective_base_groups = sorted(set(group)) + context = ( + np.repeat(np.arange(n_features).reshape(1, -1), n_updates * batch_size, axis=0).T + * (np.array(group) - np.mean(group)) + ).T + probs_reward = pd.DataFrame( + np.random.uniform(0, 1, (len(effective_base_groups), len(action_ids))), + columns=action_ids, + index=[str(g) for g in effective_base_groups], + ) + mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features, update_method="VI") + if visualize and not save: + with pytest.raises(ValueError): + CmabSimulator( + mab=mab, + visualize=visualize, + save=save, + group=[str(g) for g in group], + n_updates=n_updates, + batch_size=batch_size, + random_seed=random_seed, + probs_reward=probs_reward, + verbose=verbose, + file_prefix=file_prefix, + context=context, + ) + else: + with TemporaryDirectory() as path: + simulator = CmabSimulator( + mab=mab, + visualize=visualize, + save=save, + path=path, + group=[str(g) for g in group], + n_updates=n_updates, + batch_size=batch_size, + random_seed=random_seed, + probs_reward=probs_reward, + verbose=verbose, + file_prefix=file_prefix, + context=context, + ) + simulator.run() + if save: + assert not simulator.results.empty + dir_list = os.listdir(path) + if file_prefix: + assert f"{file_prefix}_simulation_results.csv" in dir_list + assert f"{file_prefix}_selected_actions_count.csv" in dir_list + assert f"{file_prefix}_positive_reward_proportion.csv" in dir_list + if visualize: + assert f"{file_prefix}_simulation_results.html" in dir_list + else: + assert "simulation_results.csv" in dir_list + assert "selected_actions_count.csv" in dir_list + assert "positive_reward_proportion.csv" in dir_list + if visualize: + assert "simulation_results.html" in dir_list diff --git a/tests/test_simulation_cmab.py b/tests/test_simulation_cmab.py deleted file mode 100644 index c01d964..0000000 --- a/tests/test_simulation_cmab.py +++ /dev/null @@ -1,127 +0,0 @@ -# # MIT License -# # -# # Copyright (c) 2022 Playtika Ltd. -# # -# # Permission is hereby granted, free of charge, to any person obtaining a copy -# # of this software and associated documentation files (the "Software"), to deal -# # in the Software without restriction, including without limitation the rights -# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# # copies of the Software, and to permit persons to whom the Software is -# # furnished to do so, subject to the following conditions: -# # -# # The above copyright notice and this permission notice shall be included in all -# # copies or substantial portions of the Software. -# # -# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# # SOFTWARE. - -# import numpy as np -# import pandas as pd -# from numpy.testing import assert_equal -# from pandas._testing import assert_frame_equal - -# from pybandits.cmab import Cmab -# from pybandits.simulation_cmab import SimulationCmab - - -# def test_init(): -# """Test init SimulationCmab.""" - -# random_seed = 1 -# batch_size = 100 -# n_groups = 3 -# n_updates = 10 -# n_jobs = 1 -# actions_ids = ['action A', 'action B', 'action C'] -# n_features = 5 -# X = np.random.rand(batch_size*n_updates, n_features) - -# group = np.random.randint(n_groups, size=batch_size*n_updates) -# cmab = Cmab(n_features=n_features, actions_ids=actions_ids, n_jobs=n_jobs, random_seed=random_seed) -# prob_rewards = pd.DataFrame([[0.05, 0.80, 0.05], -# [0.80, 0.05, 0.05], -# [0.80, 0.05, 0.80]], columns=actions_ids, index=range(n_groups)) -# verbose = True -# save = True -# path = 'tests/simulation/' - -# # init with default params -# sim = SimulationCmab(cmab=cmab, X=X) - -# # init with custom params -# X = pd.DataFrame(np.random.rand(batch_size * n_updates, n_features)) -# sim = SimulationCmab(cmab=cmab, X=X, group=group, batch_size=batch_size, n_updates=n_updates, -# prob_rewards=prob_rewards, save=save, path=path, random_seed=random_seed, verbose=verbose) - -# assert sim._X.shape == (sim._batch_size * sim._n_updates, sim._cmab._n_features), 'X shape mismatch' -# assert sim.results['group'].shape == (sim._batch_size * sim._n_updates,), ' group shape mismatch' -# assert sim.results['group'].isin(range(sim._n_groups)).all(), 'group array should contain only values in ' \ -# '' + str(range(n_groups)) -# assert sim._rewards.shape == (sim._batch_size * sim._n_updates, len(actions_ids)), 'reward shape mismatch' -# assert sim.results.shape == (sim._batch_size * sim._n_updates, sim._cmab._n_features), 'result shape mismatch' - -# assert_frame_equal(sim._X, X) - - -# def test_run(): -# """ Test simulation with cmab model. """ - -# random_seed = 2 -# batch_size = 10 -# features_ids = ['feat_1', 'feat_2'] -# n_groups = 2 -# n_updates = 2 -# actions_ids = ['action A', 'action B'] -# prob_rewards = pd.DataFrame([[0.05, 0.80], -# [0.80, 0.05]], columns=actions_ids, index=range(n_groups)) -# cmab = Cmab(n_features=len(features_ids), actions_ids=actions_ids, random_seed=random_seed) -# df = pd.DataFrame([[34.07772868659151, -28.948390811625714, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 0.75], -# [47.602172988242444, -11.585294068594154, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 0.75], -# [58.74075304505904, -99.71942656529076, 'action A', 0.0, 1, 0.8, 0.8, 0.0, 0.75], -# [41.462039348288144, -117.66517424958462, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 1.5], -# [56.18746540687566, -122.02451865370041, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 2.25], -# [29.982587761836534, -114.24870860989691, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 3.0], -# [56.085236090749326, -1.974650230141235, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 3.0], -# [32.42925525229372, -106.92841840939255, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 3.75], -# [31.00949679739198, -42.8284308455658, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 4.5], -# [35.47919944987376, -88.52700687134366, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 4.5], -# [62.6929015285017, -148.44461692725085, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 4.5], -# [39.95202041753999, 49.56228281374906, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 5.25], -# [47.779661185138565, 23.932111189164278, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25], -# [31.077061872610724, 88.69384882684793, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25], -# [56.119706130978265, -138.1918694119457, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 5.25], -# [37.9189897898034, 136.88209858829075, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25], -# [35.18935782070607, -83.10216873048782, 'action B', 0.0, 0, 0.8, 0.8, 0.0, 5.25], -# [2.11516522092686, -0.702259810984084, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 6.0], -# [25.560807764772438, -2.8576525901465555, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 6.75], -# [40.465956204463076, 144.48135008944516, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 6.75]], -# columns=features_ids+['action', 'reward', 'group', 'selected_prob_reward', 'max_prob_reward', -# 'regret', 'cum_regret']) -# X = df[features_ids] -# group = df['group'] -# verbose = True -# save = False -# path = 'tests/simulation/' - -# # init simulation -# sim = SimulationCmab(cmab=cmab, X=X, group=group, batch_size=batch_size, n_updates=n_updates, -# prob_rewards=prob_rewards, save=save, path=path, random_seed=random_seed, verbose=verbose) - -# # start simulation -# sim.run() -# assert_frame_equal(sim.results, df[['action', 'reward', 'group', 'selected_prob_reward', 'max_prob_reward', -# 'regret', 'cum_regret']]) - -# # test functions get -# d = {'group 0': {'action A': 6, 'action B': 4}, -# 'group 1': {'action A': 7, 'action B': 3}} -# assert_equal(sim.get_count_selected_actions(), d) - -# d = {'group 0': {'action A': np.nan, 'action B': 0.75}, -# 'group 1': {'action A': 0.8571428571428571, 'action B': np.nan}} -# assert_equal(sim.get_proportion_positive_reward(), d) diff --git a/tests/test_simulation_smab.py b/tests/test_simulation_smab.py deleted file mode 100644 index a180ef2..0000000 --- a/tests/test_simulation_smab.py +++ /dev/null @@ -1,102 +0,0 @@ -# # MIT License -# # -# # Copyright (c) 2022 Playtika Ltd. -# # -# # Permission is hereby granted, free of charge, to any person obtaining a copy -# # of this software and associated documentation files (the "Software"), to deal -# # in the Software without restriction, including without limitation the rights -# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# # copies of the Software, and to permit persons to whom the Software is -# # furnished to do so, subject to the following conditions: -# # -# # The above copyright notice and this permission notice shall be included in all -# # copies or substantial portions of the Software. -# # -# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# # SOFTWARE. - -# import pandas as pd -# from pandas._testing import assert_frame_equal - -# from pybandits.smab import Smab -# from pybandits.simulation_smab import SimulationSmab - - -# def test_init(): -# random_seed = 1 -# smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed) -# SimulationSmab(smab=smab) -# SimulationSmab( -# smab=smab, -# n_updates=20, -# batch_size=2000, -# probs_reward={"action A": 0.6, "action B": 0.0, "action C": 1.0}, -# save=True, -# path="folder/", -# random_seed=1, -# verbose=True, -# ) - - -# def test_run(): -# random_seed = 1 -# smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed) -# sim = SimulationSmab(smab=smab, n_updates=5, batch_size=6, random_seed=random_seed) -# sim.run() - -# X = pd.DataFrame( -# [ -# ["action B", 1.0], -# ["action B", 0.0], -# ["action A", 1.0], -# ["action A", 0.0], -# ["action C", 1.0], -# ["action C", 0.0], -# ["action C", 1.0], -# ["action A", 1.0], -# ["action A", 0.0], -# ["action A", 0.0], -# ["action B", 1.0], -# ["action B", 0.0], -# ["action C", 1.0], -# ["action C", 0.0], -# ["action B", 1.0], -# ["action C", 0.0], -# ["action B", 0.0], -# ["action C", 1.0], -# ["action C", 1.0], -# ["action A", 1.0], -# ["action C", 0.0], -# ["action A", 0.0], -# ["action B", 1.0], -# ["action B", 0.0], -# ["action C", 1.0], -# ["action C", 0.0], -# ["action B", 1.0], -# ["action B", 0.0], -# ["action C", 0.0], -# ["action C", 1.0], -# ], -# columns=["action", "reward"], -# ) -# assert_frame_equal(sim.results, X) - - -# def test_functions_get(): -# random_seed = 1 -# smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed) -# sim = SimulationSmab(smab=smab, n_updates=5, batch_size=6, random_seed=random_seed) -# sim.run() - -# summary_action = {"action A": 7, "action B": 10, "action C": 13} -# summary_reward = {"action A": 0.42857142857142855, "action B": 0.5, "action C": 0.5384615384615384} - -# _, _ = sim.get_cumulative_proportions()["action"], sim.get_cumulative_proportions()["reward"] - -# assert sim.get_count_selected_actions() == summary_action -# assert sim.get_proportion_positive_reward() == summary_reward diff --git a/tests/test_simulator.py b/tests/test_simulator.py new file mode 100644 index 0000000..eee7b5e --- /dev/null +++ b/tests/test_simulator.py @@ -0,0 +1,67 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from random import choice +from typing import Dict, List, Tuple + +import numpy as np +import pandas as pd +import pytest +from pytest_mock import MockerFixture + +from pybandits.base import ActionId, BinaryReward +from pybandits.mab import BaseMab +from pybandits.simulator import Simulator + + +class DummySimulator(Simulator): + def _initialize_results(self): + self._results = pd.DataFrame + + def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]: + return choice([0, 1], k=len(actions)) + + def _get_batch_step_kwargs_and_metadata(self, batch_index: int) -> Tuple[Dict[str, np.ndarray], Dict[str, List]]: + return {}, {} + + def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame: + return batch_results + + def _finalize_results(self): + pass + + +def test_mismatched_probs_reward_columns(mocker: MockerFixture): + def check_value_error(probs_reward): + with pytest.raises(ValueError): + DummySimulator(mab=mab, probs_reward=probs_reward) + + mab = mocker.Mock(spec=BaseMab) + mab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()} + mab.epsilon = 0.0 + mab.default_action = None + probs_reward = pd.DataFrame({"a3": [0.5]}) + check_value_error(probs_reward) + probs_reward = pd.DataFrame({"a1": [0.5], "a2": [2]}) + check_value_error(probs_reward) + probs_reward = pd.DataFrame({"a1": [0.5], "a2": [0.5], "a3": [0.5]}) + check_value_error(probs_reward) diff --git a/tests/test_smab_simulator.py b/tests/test_smab_simulator.py new file mode 100644 index 0000000..912051d --- /dev/null +++ b/tests/test_smab_simulator.py @@ -0,0 +1,119 @@ +# MIT License +# +# Copyright (c) 2022 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +from tempfile import TemporaryDirectory + +import numpy as np +import pandas as pd +import pytest +from hypothesis import given, settings +from hypothesis import strategies as st +from pytest_mock import MockerFixture + +from pybandits.model import Beta +from pybandits.smab import SmabBernoulli +from pybandits.smab_simulator import SmabSimulator + + +def test_mismatched_probs_reward_columns(mocker: MockerFixture): + smab = mocker.Mock(spec=SmabBernoulli) + smab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()} + smab.epsilon = 0.0 + smab.default_action = None + probs_reward = pd.DataFrame({"a1": [0.5, 0.5], "a2": [0.5, 0.5]}) + with pytest.raises(ValueError): + SmabSimulator(mab=smab, probs_reward=probs_reward) + + +def test_smab_e2e_simulation_with_default_args(action_ids=["a1", "a2"]): + mab = SmabBernoulli(actions={action_id: Beta() for action_id in action_ids}) + with TemporaryDirectory() as path: + simulator = SmabSimulator(mab=mab, visualize=True, save=True, path=path) + simulator.run() + assert not simulator.results.empty + dir_list = os.listdir(path) + assert "simulation_results.csv" in dir_list + assert "selected_actions_count.csv" in dir_list + assert "positive_reward_proportion.csv" in dir_list + assert "simulation_results.html" in dir_list + + +@settings(deadline=1000) +@given( + st.just(["a1", "a2"]), + st.integers(min_value=1, max_value=10), + st.integers(min_value=1, max_value=10), + st.booleans(), + st.sampled_from([None, 0, 42]), + st.booleans(), + st.booleans(), + st.sampled_from(["", "unit_test"]), +) +def test_smab_e2e_simulation_with_non_default_args( + action_ids, n_updates, batch_size, save, random_seed, verbose, visualize, file_prefix +): + probs_reward = pd.DataFrame(np.random.uniform(0, 1, (1, len(action_ids))), columns=action_ids) + mab = SmabBernoulli.cold_start(action_ids=action_ids) + if visualize and not save: + with pytest.raises(ValueError): + SmabSimulator( + mab=mab, + visualize=visualize, + save=save, + n_updates=n_updates, + batch_size=batch_size, + random_seed=random_seed, + probs_reward=probs_reward, + verbose=verbose, + file_prefix=file_prefix, + ) + else: + with TemporaryDirectory() as path: + simulator = SmabSimulator( + mab=mab, + visualize=visualize, + save=save, + path=path, + n_updates=n_updates, + batch_size=batch_size, + random_seed=random_seed, + probs_reward=probs_reward, + verbose=verbose, + file_prefix=file_prefix, + ) + simulator.run() + if save: + assert not simulator.results.empty + dir_list = os.listdir(path) + if file_prefix: + assert f"{file_prefix}_simulation_results.csv" in dir_list + assert f"{file_prefix}_selected_actions_count.csv" in dir_list + assert f"{file_prefix}_positive_reward_proportion.csv" in dir_list + if visualize: + assert f"{file_prefix}_simulation_results.html" in dir_list + else: + assert "simulation_results.csv" in dir_list + assert "selected_actions_count.csv" in dir_list + assert "positive_reward_proportion.csv" in dir_list + if visualize: + assert "simulation_results.html" in dir_list