From 64913ef70653e9b48341caa14d29d9a9419b97e2 Mon Sep 17 00:00:00 2001
From: Shahar Bar <33932594+shaharbar1@users.noreply.github.com>
Date: Mon, 28 Oct 2024 11:58:15 +0200
Subject: [PATCH] Renew simulation framework for multi-armed bandits (#39)

### Changes
 * Introduced `Simulator` base abstract class for simulating multi-armed bandit environments.
 * Added `SmabSimulator` class for simulating stochastic multi-armed bandits (sMAB).
 * Added `CmabSimulator` class for simulating contextual multi-armed bandits (cMAB).
 * Added utility function for identifying running code environment under utils.py.
 * Updated pyproject.toml to include bokeh dependency for interactive visualization.
 * Added unit tests for the various simulators to ensure proper functionality.
 * Removed simulation_plots.py.
 * Edited pyproject.toml to remove deprecation warnings.
 * Added model_post_init functionality in PyBanditsBaseModel for pydantic v2 compatibility.
 * Added concurrent pytest running on CI&CD.
---
 .github/workflows/continuous_delivery.yml    |   2 +-
 .github/workflows/continuous_integration.yml |   2 +-
 pybandits/base.py                            |  15 +
 pybandits/cmab_simulator.py                  | 173 ++++++
 pybandits/pydantic_version_compatibility.py  |  15 +-
 pybandits/simulation_cmab.py                 | 307 ----------
 pybandits/simulation_plots.py                |  45 --
 pybandits/simulation_smab.py                 | 213 -------
 pybandits/simulator.py                       | 614 +++++++++++++++++++
 pybandits/smab_simulator.py                  | 140 +++++
 pybandits/utils.py                           |  89 ++-
 pyproject.toml                               |  15 +-
 tests/test_cmab.py                           |  11 +-
 tests/test_cmab_simulator.py                 | 167 +++++
 tests/test_simulation_cmab.py                | 127 ----
 tests/test_simulation_smab.py                | 102 ---
 tests/test_simulator.py                      |  67 ++
 tests/test_smab_simulator.py                 | 119 ++++
 18 files changed, 1416 insertions(+), 807 deletions(-)
 create mode 100644 pybandits/cmab_simulator.py
 delete mode 100644 pybandits/simulation_cmab.py
 delete mode 100644 pybandits/simulation_plots.py
 delete mode 100644 pybandits/simulation_smab.py
 create mode 100644 pybandits/simulator.py
 create mode 100644 pybandits/smab_simulator.py
 create mode 100644 tests/test_cmab_simulator.py
 delete mode 100644 tests/test_simulation_cmab.py
 delete mode 100644 tests/test_simulation_smab.py
 create mode 100644 tests/test_simulator.py
 create mode 100644 tests/test_smab_simulator.py

diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml
index ab2679e..7c2bc7b 100644
--- a/.github/workflows/continuous_delivery.yml
+++ b/.github/workflows/continuous_delivery.yml
@@ -40,7 +40,7 @@ jobs:
           poetry run pre-commit run --all-files
       - name: Run tests
         run: |
-          poetry run pytest -vv -k 'not time and not update_parallel'
+          poetry run pytest -n auto -vv
       - name: Extract version from pyproject.toml
         id: extract_version
         run: |
diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
index 2853deb..6c3cdac 100644
--- a/.github/workflows/continuous_integration.yml
+++ b/.github/workflows/continuous_integration.yml
@@ -45,4 +45,4 @@ jobs:
           poetry run pre-commit run --all-files
       - name: Run tests
         run: |
-          poetry run pytest -vv -k 'not time and not update_parallel'
+          poetry run pytest -n auto -vv
diff --git a/pybandits/base.py b/pybandits/base.py
index 3b59c74..4cae4ad 100644
--- a/pybandits/base.py
+++ b/pybandits/base.py
@@ -36,7 +36,12 @@
 ActionId = NewType("ActionId", constr(min_length=1))
 Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
 Probability = NewType("Probability", Float01)
+# SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
+# and the second list contains their associated probabilities
 SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
+# CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
+# the second list contains their associated probabilities,
+# and the third list contains their associated weighted sums
 CmabPredictions = NewType(
     "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
 )
@@ -59,6 +64,16 @@ class PyBanditsBaseModel(BaseModel, extra="forbid"):
     BaseModel of the PyBandits library.
     """
 
+    if pydantic_version == PYDANTIC_VERSION_1:
+
+        def __init__(self, **data):
+            super().__init__(**data)
+
+            self.model_post_init(None)
+
+        def model_post_init(self, __context: Any) -> None:
+            pass
+
     def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any:
         """
         Apply the method with the given name, adjusting for the pydantic version.
diff --git a/pybandits/cmab_simulator.py b/pybandits/cmab_simulator.py
new file mode 100644
index 0000000..be3fb4a
--- /dev/null
+++ b/pybandits/cmab_simulator.py
@@ -0,0 +1,173 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import random
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+from pybandits.base import ActionId, BinaryReward
+from pybandits.cmab import BaseCmabBernoulli
+from pybandits.pydantic_version_compatibility import Field, model_validator
+from pybandits.simulator import Simulator
+
+
+class CmabSimulator(Simulator):
+    """
+    Simulate environment for contextual multi-armed bandit models.
+
+    This class simulates information required by the contextual bandit. Generated data are processed by the bandit with
+    batches of   size n>=1. For each batch of samples, actions are recommended by the bandit and corresponding simulated
+    rewards collected. Bandit policy parameters are then updated based on returned rewards from recommended actions.
+
+    Parameters
+    ----------
+    mab : BaseCmabBernoulli
+        Contextual multi-armed bandit model
+    context : np.ndarray of shape (n_samples, n_feature)
+        Context matrix of samples features.
+    group : Optional[List] with length=n_samples
+        Group to which each sample belongs. Samples which belongs to the same group have features that come from the
+        same distribution and they have the same probability to receive a positive/negative feedback from each action.
+        If not supplied, all samples are assigned to the group.
+    """
+
+    mab: BaseCmabBernoulli = Field(validation_alias="cmab")
+    context: np.ndarray
+    group: Optional[List] = None
+    _base_columns: List[str] = ["batch", "action", "reward", "group"]
+
+    @model_validator(mode="before")
+    @classmethod
+    def replace_nulls_and_validate_sizes_and_dtypes(cls, values):
+        context = values["context"]
+        batch_size = cls._get_value_with_default("batch_size", values)
+        n_updates = cls._get_value_with_default("n_updates", values)
+        group = cls._get_value_with_default("group", values)
+
+        if len(context) != batch_size * n_updates:
+            raise ValueError("Context length must equal to batch_size x n_updates.")
+        if group is None:
+            group = len(context) * ["0"]
+            values["group"] = group
+        else:
+            if len(context) != len(group):
+                raise ValueError("Mismatch between context length and group length")
+            values["group"] = [str(g) for g in group]
+        mab_action_ids = list(values["mab"].actions.keys())
+        index = list(set(group))
+        probs_reward = cls._get_value_with_default("probs_reward", values)
+        if probs_reward is None:
+            probs_reward = pd.DataFrame(0.5, index=index, columns=mab_action_ids)
+            values["probs_reward"] = probs_reward
+        else:
+            if probs_reward.shape[0] != len(index):
+                raise ValueError("number of probs_reward rows must match the number of groups.")
+        return values
+
+    def _initialize_results(self):
+        """
+        Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results.
+        """
+        self._results = pd.DataFrame(
+            columns=["action", "reward", "group", "selected_prob_reward", "max_prob_reward"],
+        )
+
+    def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]:
+        """
+        Draw rewards for the selected actions based on metadata according to probs_reward
+
+        Parameters
+        ----------
+        actions : List[ActionId]
+            The actions selected by the multi-armed bandit model.
+        metadata : Dict[str, List]
+            The metadata for the selected actions; should contain the batch groups association.
+
+        Returns
+        -------
+        reward : List[BinaryReward]
+            A list of binary rewards.
+        """
+        rewards = [int(random.random() < self.probs_reward.loc[g, a]) for g, a in zip(metadata["group"], actions)]
+        return rewards
+
+    def _get_batch_step_kwargs_and_metadata(
+        self, batch_index
+    ) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], Dict[str, List]]:
+        """
+        Extract context required for the cMAB's update and predict functionality,
+        as well as metadata for sample group.
+
+        Parameters
+        ----------
+        batch_index : int
+            The index of the batch.
+
+        Returns
+        -------
+        predict_kwargs : Dict[str, np.ndarray]
+            Dictionary containing the context for the batch.
+        update_kwargs : Dict[str, np.ndarray]
+            Dictionary containing the context for the batch.
+        metadata : Dict[str, List]
+            Dictionary containing the group information for the batch.
+        """
+        idx_batch_min = batch_index * self.batch_size
+        idx_batch_max = (batch_index + 1) * self.batch_size
+        predict_and_update_kwargs = {"context": self.context[idx_batch_min:idx_batch_max]}
+        metadata = {"group": self.group[idx_batch_min:idx_batch_max]}
+        return predict_and_update_kwargs, predict_and_update_kwargs, metadata
+
+    def _finalize_step(self, batch_results: pd.DataFrame):
+        """
+        Finalize the step by adding additional information to the batch results.
+
+        Parameters
+        ----------
+        batch_results : pd.DataFrame
+            raw batch results
+
+        Returns
+        -------
+        batch_results : pd.DataFrame
+            batch results with added reward probability for selected action and most rewarding action
+        """
+        group_id = batch_results.loc[:, "group"]
+        action_id = batch_results.loc[:, "action"]
+        selected_prob_reward = [self.probs_reward.loc[g, a] for g, a in zip(group_id, action_id)]
+        batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward
+        max_prob_reward = self.probs_reward.loc[group_id].max(axis=1)
+        batch_results.loc[:, "max_prob_reward"] = max_prob_reward.tolist()
+        return batch_results
+
+    def _finalize_results(self):
+        """
+        Finalize the simulation process. Used to add regret and cumulative regret
+
+        Returns
+        -------
+        None
+        """
+        self._results["regret"] = self._results["max_prob_reward"] - self._results["selected_prob_reward"]
+        self._results["cum_regret"] = self._results["regret"].cumsum()
diff --git a/pybandits/pydantic_version_compatibility.py b/pybandits/pydantic_version_compatibility.py
index a119264..b032ecd 100644
--- a/pybandits/pydantic_version_compatibility.py
+++ b/pybandits/pydantic_version_compatibility.py
@@ -27,7 +27,18 @@
 from typing import Any, Callable, Dict, Literal, Optional, Union
 from warnings import warn
 
-from pydantic import BaseModel, Field, NonNegativeFloat, PositiveInt, ValidationError, confloat, conint, constr
+from pydantic import (
+    BaseModel,
+    Field,
+    NonNegativeFloat,
+    NonNegativeInt,
+    PositiveInt,
+    PrivateAttr,
+    ValidationError,
+    confloat,
+    conint,
+    constr,
+)
 from pydantic.version import VERSION as _VERSION
 
 # Define the pydantic versions
@@ -258,6 +269,7 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) ->
     "model_validator",
     "validate_call",
     "NonNegativeFloat",
+    "NonNegativeInt",
     "PositiveInt",
     "BaseModel",
     "ValidationError",
@@ -265,4 +277,5 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) ->
     "conint",
     "constr",
     "Field",
+    "PrivateAttr",
 ]
diff --git a/pybandits/simulation_cmab.py b/pybandits/simulation_cmab.py
deleted file mode 100644
index 1578f32..0000000
--- a/pybandits/simulation_cmab.py
+++ /dev/null
@@ -1,307 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2022 Playtika Ltd.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import random
-from json import dump
-
-import numpy as np
-import pandas as pd
-
-from pybandits.core.cmab import Cmab, check_context_matrix
-
-
-class SimulationCmab:
-    """
-    Simulate environment for contextual multi-armed bandit models.
-
-    This class simulates information required by the contextual bandit. Generated data are processed by the bandit with
-    batches of   size n>=1. For each batch of samples, actions are recommended by the bandit and corresponding simulated
-    rewards collected. Bandit policy parameters are then updated based on returned rewards from recommended actions.
-
-    Parameters
-    ----------
-    cmab : pybandits.core.cmab.Cmab
-        Contextual multi-armed bandit model
-    X : array_like of shape (n_samples, n_feature)
-        Context matrix of samples features.
-    batch_size: int, default=100
-        The number of samples per batch.
-    n_updates : int, default=10
-        The number of updates in the simulation.
-    group : list int with length=n_samples
-        Group to which each sample belongs. Samples which belongs to the same group have features that come from the
-        same distribution and they have the same probability to receive a positive/negative feedback from each action.
-    prob_rewards : pd.DataFrame of shape (n_groups, n_actions)
-        Matrix of positive reward probability for each group-action combination. If None all probs are set to 0.5.
-    save : bool, default=False
-        Boolean flag to save the results.
-    path : string, default=''
-        Path where results are saved if save=True
-    verbose :  bool, default=False
-        Enable verbose output. If True produce detailed logging information about the simulation.
-    random_seed : int, default=None
-        Seed for random state. If specified, the model outputs deterministic results.
-    """
-
-    def __init__(
-        self,
-        cmab,
-        X,
-        batch_size=100,
-        n_updates=10,
-        group=None,
-        prob_rewards=None,
-        save=False,
-        path="",
-        random_seed=None,
-        verbose=False,
-    ):
-        # init cmab
-        if type(cmab) is not Cmab:
-            raise TypeError("cmab must be of type pybandits.core.cmab.Cmab")
-        self._cmab = cmab
-
-        # init batch-size
-        if type(batch_size) is not int and batch_size <= 0:
-            raise ValueError("batch_size must be an integer > 0")
-        self._batch_size = batch_size
-
-        # init n_updates
-        if type(n_updates) is not int and n_updates <= 0:
-            raise ValueError("n_updates must be an integer > 0")
-        self._n_updates = n_updates
-
-        # init X
-        self._X = check_context_matrix(X, cmab._n_features)
-        if len(self._X) != batch_size * n_updates:
-            raise ValueError(
-                "Mismatch between n_samples samples in the context matrix with batch_size and n_updates. "
-                "len(X) must be equal to batch_size x n_updates."
-            )
-
-        # init group
-        if group is None:
-            group = len(X) * [0]  # if the input argument group is not specified, set all samples group to #0
-        if len(group) != len(X):
-            raise ValueError("The length of X must equal to the length of group")
-        self._n_groups = len(set(group))
-
-        # create matrix of probability rewards if None (by default all probs = 0.5)
-        if prob_rewards is None:
-            prob_rewards = pd.DataFrame(0.5, index=set(group), columns=cmab._actions_ids)
-        if prob_rewards.shape[0] != self._n_groups and self._prob_rewards.shape[1] != len(self._cmab._actions_ids):
-            raise ValueError(
-                "matrix of probability rewards should have shape ({}, {}), while detected shape is {}".format(
-                    len(group), len(cmab._actions_ids), prob_rewards.shape
-                )
-            )
-
-        # init prob_rewards, save, path, verbose
-        if type(path) is not str:
-            raise TypeError("path must be a string")
-        if type(save) is not bool:
-            raise TypeError("save must be boolean (True/False)")
-        if type(random_seed) is not int and random_seed is not None:
-            raise TypeError("random_seed must be an integer")
-        if type(verbose) is not bool:
-            raise TypeError("verbose must be boolean (True/False)")
-        self._prob_rewards = prob_rewards
-        self._save = save
-        self._path = path
-        self._verbose = verbose
-
-        # create rewards per each sample given the matrix of probability rewards
-        random.seed(random_seed)
-        self._rewards = [
-            [1 if random.random() < prob_rewards.iloc[group[i], j] else 0 for j in range(len(cmab._actions_ids))]
-            for i in range(batch_size * n_updates)
-        ]
-        self._rewards = pd.DataFrame(self._rewards, columns=cmab._actions_ids)
-
-        # created DataFrame for simulation results
-        self.results = pd.DataFrame(
-            np.nan,
-            columns=["action", "reward", "group", "selected_prob_reward", "max_prob_reward"],
-            index=range(batch_size * n_updates),
-        )
-        self.results["group"] = pd.Series(group)
-
-        if self._verbose:
-            print("Setup simulation  completed.")
-            df = pd.DataFrame(
-                [
-                    np.sum(self._rewards.loc[self.results["group"] == i]) / sum(self.results["group"] == i)
-                    for i in range(self._n_groups)
-                ]
-            )
-            df.index.name = "group"
-            self._prob_rewards.index.name = "group"
-            print("Simulated input probability rewards:\n", df, "\n")
-
-    def run(self):
-        """
-        Start simulation process. It consists in the following steps:
-
-            - for i=0 to n_updates
-                - Extract batch[i] of samples from X
-                - Model recommends the best actions as the action with the highest reward probability to each simulated
-                  sample in batch[i] and collect corresponding simulated rewards
-                - Model priors are updated using information from recommended actions and returned rewards
-        """
-
-        for i in range(self._n_updates):
-            if self._verbose:
-                print("Iteration #{}".format(i + 1))
-
-            # extract simulated data for the current batch and scale the features
-            idx_batch_min = i * self._batch_size
-            idx_batch_max = (i + 1) * self._batch_size - 1
-            X_batch = self._X.loc[idx_batch_min:idx_batch_max]
-
-            # predict
-            if self._verbose:
-                print("Start predict batch {} ...".format(i + 1))
-
-            actions, _ = self._cmab.fast_predict(X_batch)
-
-            # Get reward
-            rewards = [self._rewards.loc[j + idx_batch_min, actions[j]] for j in range(self._batch_size)]
-
-            # update cmab
-            if self._verbose:
-                print("Start update batch {} ...".format(i + 1), "\n")
-            self._cmab.update(X=X_batch, actions=actions, rewards=rewards)
-
-            # write in simulation results
-            self.results.loc[idx_batch_min:idx_batch_max, "action"] = pd.Series(
-                actions, index=range(idx_batch_min, idx_batch_max + 1)
-            )
-            self.results.loc[idx_batch_min:idx_batch_max, "reward"] = pd.Series(
-                rewards, index=range(idx_batch_min, idx_batch_max + 1)
-            )
-
-            # write for regret analysis:
-            # 1. extract group information
-            # 2. reward prob for selected action and
-            # 3. reward probability from most rewarding action
-            group_id = self.results.loc[idx_batch_min:idx_batch_max, "group"].tolist()
-            selected_prob_reward = [
-                self._prob_rewards.iloc[group_id[k], self._cmab._actions_ids.index(actions[k])]
-                for k in range(len(actions))
-            ]
-            self.results.loc[idx_batch_min:idx_batch_max, "selected_prob_reward"] = pd.Series(
-                selected_prob_reward, index=range(idx_batch_min, idx_batch_max + 1)
-            )
-            max_prob_reward = [self._prob_rewards.iloc[group_id[k],].max() for k in range(len(actions))]
-            self.results.loc[idx_batch_min:idx_batch_max, "max_prob_reward"] = pd.Series(
-                max_prob_reward, index=range(idx_batch_min, idx_batch_max + 1)
-            )
-
-            # save partial results
-            if self._save:
-                self.results.to_csv(self._path + "simulation_results.csv")
-
-        # compute expected cumulative regrets
-        self.results["regret"] = self.results["max_prob_reward"] - self.results["selected_prob_reward"]
-        self.results["cum_regret"] = self.results["regret"].cumsum()
-
-        if self._verbose:
-            self._print_results()
-
-        # store results
-        if self._save:
-            if self._verbose:
-                print("Saving results...")
-            self._save_results()
-
-    def get_count_selected_actions(self):
-        """
-        Get the proportion of recommended actions per group at the end of the process.
-
-        Returns
-        -------
-        df : pandas DataFrame
-            Matrix of the proportion of recommended actions per group.
-        """
-        return {
-            "group " + str(i): (self.results["action"].loc[self.results["group"] == i].value_counts()).to_dict()
-            for i in range(self._n_groups)
-        }
-
-    def get_proportion_positive_reward(self):
-        """
-        Get the proportion of positive rewards per group/action at the end of the process.
-
-        Returns
-        -------
-        df : pandas DataFrame
-            Matrix of the proportion of positive rewards per group/action.
-        """
-        return {
-            "group " + str(i): (
-                self.results["action"].loc[(self.results["group"] == i) & (self.results["reward"] == 1)].value_counts()
-                / self.results["action"].loc[(self.results["group"] == i)].value_counts()
-            ).to_dict()
-            for i in range(self._n_groups)
-        }
-
-    def get_cumulative_proportions(self, path=""):
-        """
-        Plot results of the simulation. It will create two plots per each group which display:
-            - The cumulated proportion of action
-            - The cumulated proportion of rewards
-
-        Parameters
-        ----------
-        path: str, default=''
-            Path in which plot figures are saved.
-        """
-        d = {}
-        for i in range(self._n_groups):
-            actions = pd.get_dummies(self.results["action"].loc[self.results["group"] == i]).reset_index(drop=True)
-            actions_plot = actions.cumsum().div(actions.index.values + 1, axis=0)
-
-            rewards = pd.get_dummies(self.results["action"].loc[self.results["group"] == i])
-            rewards.loc[(self.results["group"] == i) & self.results["reward"] == 0] = 0
-            rewards.reset_index(inplace=True, drop=True)
-            rewards_plot = rewards.cumsum().div(actions.cumsum())
-
-            d["group " + str(i)] = {"action": actions_plot, "reward": rewards_plot}
-        return d
-
-    def _print_results(self):
-        """Private function to print results."""
-        print("Simulation results (first 10 observations):\n", self.results.head(10), "\n")
-        print("Count of actions selected by the bandit: \n", self.get_count_selected_actions(), "\n")
-        print("Observed proportion of positive rewards for each action:\n", self.get_proportion_positive_reward(), "\n")
-
-    def _save_results(self):
-        """Private function to save results."""
-        self.results.to_csv(self._path + "simulation_results.csv")
-
-        f = open("count_selected_actions.json", "w")
-        dump(self.get_count_selected_actions(), f)
-        f.close()
-
-        f = open("proportions_of_positive_rewards.json", "w")
-        dump(self.get_proportion_positive_reward(), f)
-        f.close()
diff --git a/pybandits/simulation_plots.py b/pybandits/simulation_plots.py
deleted file mode 100644
index fcd78bb..0000000
--- a/pybandits/simulation_plots.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2022 Playtika Ltd.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import matplotlib.pyplot as plt
-import pandas as pd
-from plotnine import aes, geom_line, ggplot, ylab
-
-
-def plot_cumulative_proportions(matrix, figsize=None, path="", title=""):
-    """Plot simulation."""
-    fig, ax = plt.subplots(figsize=figsize)
-    ax.plot(matrix)
-    ax.set_title(title)
-    ax.set_xlabel("number of observations")
-    ax.set_ylim(-0.1, 1.1)
-    ax.set_ylabel("Cumulative proportion")
-    ax.legend(matrix.columns)
-    fig.savefig(path + title)
-
-    return fig, ax
-
-
-def plot_regrets(cum_regret):
-    """Plot cumulative regrets."""
-    regrets = pd.Series(cum_regret).reset_index().rename(columns={"index": "number of observations"})
-    return ggplot(regrets, aes(y="cum_regret", x="number of observations")) + geom_line() + ylab("cumulative regret")
diff --git a/pybandits/simulation_smab.py b/pybandits/simulation_smab.py
deleted file mode 100644
index 1780369..0000000
--- a/pybandits/simulation_smab.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2022 Playtika Ltd.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import random
-
-import numpy as np
-import pandas as pd
-
-from pybandits.core.smab import Smab
-
-
-class SimulationSmab:
-    """
-    Simulate environment for stochastic multi-armed bandits.
-
-    This class performs simulation of stochastic Multi-Armed Bandits (sMAB). Data are processed in batches of size n>=1.
-    Per each batch of simulated samples, the sMAB selects one action and collects the corresponding simulated reward for
-    each sample. Then, prior parameters are updated based on returned rewards from recommended actions.
-
-    Parameters
-    ----------
-    smab : pybandits.core.smab.Smab
-        Stochastic multi-armed bandit model.
-    n_updates : int, default=10
-        The number of updates (i.e. batches of samples) in the simulation.
-    batch_size: int, default=100
-        The number of samples per batch.
-    probs_reward : dict, default=None
-        The reward probability for the different actions. If None probabilities are set to 0.5.
-        The keys of the dict must match the smab actions_ids, and the values are float in the interval [0, 1].
-        e.g. probs_reward={'action A': 0.6, 'action B': 0.8, 'action C': 1.}
-    save : bool, default=False
-        Boolean flag to save the results.
-    path : string, default=''
-        Path where results are saved if save=True
-    random_seed : int, default=None
-        Seed for random state. If specified, the model outputs deterministic results.
-    verbose :  bool, default=False
-        Enable verbose output. If True, detailed logging information about the simulation are provided.
-    """
-
-    def __init__(
-        self,
-        smab,
-        n_updates=10,
-        batch_size=100,
-        probs_reward=None,
-        save=False,
-        path="",
-        random_seed=None,
-        verbose=False,
-    ):
-        if type(smab) is not Smab:
-            raise TypeError("smab must be of type pybandits.core.smab.Smab")
-        if type(n_updates) is not int and n_updates <= 0:
-            raise ValueError("n_updates must be an integer > 0")
-        if type(batch_size) is not int and batch_size <= 0:
-            raise ValueError("batch_size must be an integer > 0")
-        if type(save) is not bool:
-            raise TypeError("save must be boolean (True/False)")
-        if type(path) is not str:
-            raise TypeError("path must be a string")
-        if type(random_seed) is not int and random_seed is not None:
-            raise TypeError("random_seed must be an integer")
-        if type(verbose) is not bool:
-            raise TypeError("verbose must be boolean (True/False)")
-
-        if probs_reward is None:
-            probs_reward = {k: v for (k, v) in zip(smab._actions_ids, len(smab._actions_ids) * [0.5])}
-        if (
-            type(probs_reward) is not dict
-            or not all(isinstance(x, str) for x in probs_reward.keys())
-            or not all(isinstance(x, float) for x in probs_reward.values())
-        ):
-            raise TypeError("probs_reward must be a dict with string as keys and float as values.")
-        if set(probs_reward.keys()) != set(smab._actions_ids):
-            raise ValueError("probs_reward dict keys must match smab actions_ids.")
-        if all(v > 1 for v in probs_reward.values()) or all(v < 0 for v in probs_reward.values()):
-            raise ValueError("probs_reward values must be in the interval [0, 1].")
-
-        self._smab = smab
-        self._n_updates = n_updates
-        self._batch_size = batch_size
-        self._probs_reward = probs_reward
-        self._save = save
-        self._path = path
-        self._random_seed = random_seed
-        self._verbose = verbose
-
-        # created DataFrame for simulation results
-        self.results = pd.DataFrame(np.nan, columns=["action", "reward"], index=range(batch_size * n_updates))
-
-    def run(self):
-        """
-        Start simulation process. It consists in the following steps:
-            for i=0 to n_updates
-                Consider batch[i] of observation
-                sMAB selects the best action as the action with the highest reward probability to each sample in
-                    batch[i].
-                Rewards are returned for each recommended action
-                Prior parameters are updated based on recommended actions and returned rewards
-        """
-        for i in range(self._n_updates):
-            # select actions for batch #i
-            actions, _ = self._smab.predict(n_samples=self._batch_size)
-
-            # find min and max indexes for batch #i
-            idx_batch_min = i * self._batch_size
-            idx_batch_max = (i + 1) * self._batch_size - 1
-
-            # write the selected actions for batch #i in the results matrix
-            self.results.loc[idx_batch_min:idx_batch_max, "action"] = actions
-
-            for a in self._smab._actions_ids:
-                # simulate the rewards
-                random.seed(self._random_seed)
-                rewards = [1 if random.random() < self._probs_reward[a] else 0 for i in range(actions.count(a))]
-
-                # find indexes of the action 'a' in the array 'actions'
-                idx = [i for i in range(len(actions)) if actions[i] == a]
-
-                # write rewards for batch #i and action 'a' in the result matrix
-                self.results.loc[[idx_batch_min + i for i in idx], "reward"] = rewards
-
-                # update the stochastic multi-armed bandit model
-                self._smab.update(action_id=a, n_successes=rewards.count(1), n_failures=rewards.count(0))
-
-        # print results
-        if self._verbose:
-            self._print_results()
-
-        # store results
-        if self._save:
-            if self._verbose:
-                print("Saving results at {}".format(self._path))
-            self._save_results()
-
-    def get_count_selected_actions(self):
-        """
-        Get the count of actions selected by the bandit at the end of the process.
-
-        Returns
-        -------
-        dict
-            Dictionary with keys=action_ids and values=count of recommended actions.
-        """
-        return dict(self.results.action.value_counts())
-
-    def get_proportion_positive_reward(self):
-        """
-        Get the observed proportion of positive rewards for each action at the end of the simulation process.
-
-        Returns
-        -------
-        dict
-            Dictionary with keys=action_ids and values=proportion of positive rewards for each action.
-        """
-        d = {}
-        for a in self._smab._actions_ids:
-            x = self.results[self.results.action == a]
-            d[a] = sum(x.reward) / len(x)
-        return d
-
-    def get_cumulative_proportions(self):
-        """
-        Get (i) the cumulative action proportions and (ii) the cumulative reward proportions per action.
-
-        Returns
-        -------
-        dict
-            Dictionary with keys=(actions, reward) and
-            values=(cumulative action proportions, cumulative reward proportions per action)
-        """
-        actions = pd.get_dummies(self.results["action"]).reset_index(drop=True)
-        actions_plot = actions.cumsum().div(actions.index.values + 1, axis=0)
-
-        rewards = pd.get_dummies(self.results["action"])
-        rewards.loc[self.results["reward"] == 0] = 0
-        rewards.reset_index(inplace=True, drop=True)
-        rewards_plot = rewards.cumsum().div(actions.cumsum())
-
-        return {"action": actions_plot, "reward": rewards_plot}
-
-    def _print_results(self):
-        """Private function to print results."""
-        print("Simulation results (first 10 observations):\n", self.results.head(10), "\n")
-        print("Count of actions selected by the bandit: \n", self.get_count_selected_actions(), "\n")
-        print("Observed proportion of positive rewards for each action:\n", self.get_proportion_positive_reward(), "\n")
-
-    def _save_results(self):
-        """Private function to save results."""
-        self.results.to_csv("simulation_results.csv", index=False)
-        with open(self._path + "summary.txt", "w") as f:
-            f.write(str(self.get_count_selected_actions()) + "\n" + str(self.get_proportion_positive_reward()))
diff --git a/pybandits/simulator.py b/pybandits/simulator.py
new file mode 100644
index 0000000..c9f143a
--- /dev/null
+++ b/pybandits/simulator.py
@@ -0,0 +1,614 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os.path
+import random
+from abc import ABC, abstractmethod
+from functools import cached_property
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from bokeh.core.enums import Palette
+from bokeh.layouts import layout
+from bokeh.models import ColumnDataSource, HoverTool, Legend, Plot, TabPanel
+from bokeh.palettes import Category10, Turbo256
+from bokeh.plotting import figure
+from loguru import logger
+
+from pybandits.base import ActionId, BinaryReward, PyBanditsBaseModel
+from pybandits.mab import BaseMab
+from pybandits.pydantic_version_compatibility import (
+    PYDANTIC_VERSION_1,
+    PYDANTIC_VERSION_2,
+    NonNegativeInt,
+    PositiveInt,
+    PrivateAttr,
+    field_validator,
+    model_validator,
+    pydantic_version,
+)
+from pybandits.utils import in_jupyter_notebook, visualize_via_bokeh
+
+
+class Simulator(PyBanditsBaseModel, ABC):
+    """
+    Simulate environment for multi-armed bandits.
+
+    This class performs simulation of Multi-Armed Bandits (MAB). Data are processed in batches of size n>=1.
+    Per each batch of simulated samples, the mab selects one action and collects the corresponding simulated reward for
+    each sample. Then, prior parameters are updated based on returned rewards from recommended actions.
+
+    Parameters
+    ----------
+    mab : BaseMab
+        MAB model.
+    n_updates : PositiveInt, defaults  to 10
+        The number of updates (i.e. batches of samples) in the simulation.
+    batch_size: PositiveInt, defaults to 100
+        The number of samples per batch.
+    probs_reward : Optional[pd.DataFrame], default=None
+        The reward probability for the different actions. If None probabilities are set to 0.5.
+        The keys of the dict must match the mab actions_ids, and the values are float in the interval [0, 1].
+        e.g. probs_reward=pd.DataFrame({"a1 A": [0.6], "a2 B": [0.5], "a3": [0.8]}).
+        Note that currently only single-objective reward is supported.
+    save : bool, defaults to False
+        Boolean flag to save the results.
+    path : string, default to ''
+        Path where_results are saved if save=True
+    file_prefix : string, default to ''
+        Prefix for the file name where results are saved.
+    random_seed : int, default=None
+        Seed for random state. If specified, the model outputs deterministic results.
+    verbose :  bool, default=False
+        Enable verbose output. If True, detailed logging information about the simulation are provided.
+    visualize : bool, default=False
+        Enable visualization of the simulation results.
+    """
+
+    mab: BaseMab
+    n_updates: PositiveInt = 10
+    batch_size: PositiveInt = 100
+    probs_reward: Optional[pd.DataFrame] = None
+    save: bool = False
+    path: str = ""
+    file_prefix: str = ""
+    random_seed: Optional[NonNegativeInt] = None
+    verbose: bool = False
+    visualize: bool = False
+    _results: pd.DataFrame = PrivateAttr()
+    _base_columns: List[str] = PrivateAttr()
+    _cumulative_col_prefix: str = "cum"
+    # Define dash patterns, markers, and colors for lines
+    _dash_patterns = ["solid", "dashed", "dotted"]
+    _markers = ["circle", "square", "triangle", "diamond", "star"]
+
+    ############################################ Instance Input Validators #############################################
+
+    if pydantic_version == PYDANTIC_VERSION_1:
+
+        class Config:
+            arbitrary_types_allowed = True
+            allow_population_by_field_name = True
+
+    elif pydantic_version == PYDANTIC_VERSION_2:
+        model_config = {"arbitrary_types_allowed": True, "populate_by_name": True}
+    else:
+        raise ValueError(f"Unsupported pydantic version: {pydantic_version}")
+
+    @field_validator("probs_reward", mode="before")
+    @classmethod
+    def validate_probs_reward_values(cls, value):
+        if value is not None:
+            if not all(value.dtypes.apply(lambda x: x.kind == "f")):
+                raise ValueError("probs_reward values must be float.")
+            if not value.applymap(lambda x: 0 <= x <= 1).all().all():
+                raise ValueError("probs_reward values must be in the interval [0, 1].")
+        return value
+
+    @field_validator("file_prefix", mode="before")
+    def maybe_alter_file_prefix(cls, value):
+        return f"{value}_" if value else ""
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_probs_reward_columns(cls, values):
+        if "probs_reward" in values and values["probs_reward"] is not None:
+            mab_action_ids = list(values["mab"].actions.keys())
+            if set(values["probs_reward"].columns) != set(mab_action_ids):
+                raise ValueError("probs_reward columns must match mab actions ids.")
+            if values["probs_reward"].shape[1] != len(mab_action_ids):
+                raise ValueError("probs_reward columns must be the same as the number of MAB actions.")
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_visualize_without_save(cls, values):
+        visualize = cls._get_value_with_default("visualize", values)
+        save = cls._get_value_with_default("save", values)
+        if visualize and not save and not in_jupyter_notebook():
+            raise ValueError("Visualize cannot be enabled without saving the results if shell is not Jupyter.")
+        return values
+
+    ####################################################################################################################
+
+    def model_post_init(self, __context: Any) -> None:
+        # set random seed for reproducibility
+        random.seed(self.random_seed)
+        np.random.default_rng(self.random_seed)
+        self._initialize_results()
+
+    @abstractmethod
+    def _initialize_results(self):
+        """
+        Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results.
+        """
+        pass
+
+    @property
+    def results(self):
+        return self._results
+
+    def run(self):
+        """
+        Start simulation process. It consists in the following steps:
+            for i=0 to n_updates
+                Consider batch[i] of observation
+                mab selects the best action as the action with the highest reward probability to each sample in
+                    batch[i].
+                Rewards are returned for each recommended action
+                Prior parameters are updated based on recommended actions and returned rewards
+        """
+        for batch_index in range(self.n_updates):
+            predict_kwargs, update_kwargs, metadata = self._get_batch_step_kwargs_and_metadata(batch_index)
+            self._step(batch_index, metadata, predict_kwargs, update_kwargs)
+
+        self._finalize_results()
+
+        # print results
+        if self.verbose:
+            self._print_results()
+
+        if self.visualize:
+            self._visualize_results()
+
+        # store results
+        if self.save:
+            if self.verbose:
+                logger.info(f"Saving results at {self.path}")
+            self._save_results()
+
+    def _step(
+        self,
+        batch_index: int,
+        metadata: Dict[str, List],
+        predict_kwargs: Dict[str, Union[int, np.ndarray]],
+        update_kwargs: Dict[str, np.ndarray],
+    ):
+        """
+        Perform a step of the simulation process. It consists in the following steps:
+            - select actions for batch via mab.predict
+            - draw rewards for the selected actions based on metadata according to probs_reward
+            - write the selected actions for batch #i in the results matrix
+            - update the mab model with the selected actions and the corresponding rewards via mab.update
+
+        Parameters
+        ----------
+        batch_index : int
+            The index of the batch.
+        metadata : Dict[str, List]
+            The metadata for the selected actions.
+        predict_kwargs : Dict[str, Union[int, np.ndarray]]
+            Dictionary containing the keyword arguments for the batch used in mab.predict.
+        update_kwargs : Dict[str, np.ndarray]
+            Dictionary containing the keyword arguments for the batch used in mab.update.
+        """
+        # select actions for batch #index
+        predictions = self.mab.predict(**predict_kwargs)
+        actions = predictions[0]  # location 0 is the actions for both SmabPredictions and CmabPredictions
+        rewards = self._draw_rewards(actions, metadata)
+        # write the selected actions for batch #i in the results matrix
+        batch_results = pd.DataFrame({"action": actions, "reward": rewards, "batch": batch_index, **metadata})
+        batch_results = self._finalize_step(batch_results)
+        if not all(col in batch_results.columns for col in self._base_columns):
+            raise ValueError(f"The batch results must contain the {self._base_columns} columns")
+        self._results = pd.concat((self._results, batch_results), ignore_index=True)
+        self.mab.update(actions=actions, rewards=rewards, **update_kwargs)
+
+    @abstractmethod
+    def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]:
+        """
+        Draw rewards for the selected actions based on metadata according to probs_reward.
+
+        Parameters
+        ----------
+        actions : List[ActionId]
+            The actions selected by the multi-armed bandit model.
+        metadata : Dict[str, List]
+            The metadata for the selected actions.
+
+        Returns
+        -------
+        reward : List[BinaryReward]
+            A list of binary rewards.
+        """
+        pass
+
+    @abstractmethod
+    def _get_batch_step_kwargs_and_metadata(
+        self, batch_index: int
+    ) -> Tuple[Dict[str, Union[int, np.ndarray]], Dict[str, np.ndarray], Dict[str, List]]:
+        """
+        Extract kwargs required for the MAB's update and predict functionality,
+        as well as metadata for sample association.
+
+        Parameters
+        ----------
+        batch_index : int
+            The index of the batch.
+
+        Returns
+        -------
+        predict_kwargs : Dict[str, Union[int, np.ndarray]]
+            Dictionary containing the keyword arguments for the batch used in mab.predict.
+        update_kwargs : Dict[str, Any]
+            Dictionary containing the keyword arguments for the batch used in mab.update.
+        metadata : Dict[str, List]
+            Dictionary containing the association information for the batch.
+        """
+        pass
+
+    @abstractmethod
+    def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame:
+        """
+        Finalize the step by adding additional information to the batch results.
+
+        Parameters
+        ----------
+        batch_results : pd.DataFrame
+            raw batch results
+
+        Returns
+        -------
+        batch_results : pd.DataFrame
+            batch results with added columns
+        """
+        pass
+
+    @abstractmethod
+    def _finalize_results(self):
+        """
+        Finalize the simulation process. It can be used to add additional information to the results.
+
+        Returns
+        -------
+        None
+        """
+        pass
+
+    @cached_property
+    def _action_ids(self) -> List[ActionId]:
+        """
+        Get the list of actions.
+
+        Returns
+        -------
+        List[ActionId]
+            The list of actions
+        """
+        return sorted(list(self.mab.actions.keys()))
+
+    @cached_property
+    def _cumulative_actions_cols(self) -> List[str]:
+        """
+        Get the list of cumulative actions columns.
+
+        Returns
+        -------
+        : List[str]
+            The list of cumulative actions columns
+        """
+        return [f"{self._cumulative_col_prefix}_{action}" for action in self._action_ids]
+
+    @property
+    def _colors(self) -> Palette:
+        """
+        Get the palette of colors.
+
+        Returns
+        -------
+        : Palette
+            Palette of colors
+        """
+        n_actions = len(self._action_ids)
+        category10_keys = Category10.keys()
+        return Category10[max(n_actions, min(category10_keys))] if n_actions <= max(category10_keys) else Turbo256
+
+    @classmethod
+    def _impute_missing_counts(cls, df, action_ids):
+        """
+        Impute missing counts for actions in the data frame.
+
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Data frame with counts of actions selected by the bandit.
+        action_ids : List[ActionId]
+            List of action ids.
+
+        Returns
+        -------
+        df : pd.DataFrame
+            Data frame with imputed missing counts for actions.
+        """
+        for action_id in action_ids:
+            if action_id not in df.columns:
+                df[action_id] = 0
+        return df
+
+    @property
+    def selected_actions_count(self) -> pd.DataFrame:
+        """
+        Get the count of actions selected by the bandit on each batch and
+        at the end of the simulation process.
+
+        Returns
+        -------
+        counts_df : pd.DataFrame
+            Data frame with batch serial number as index (or total for all batches), actions as columns,
+            and count of recommended actions as values
+        """
+        groupby_cols = [col for col in self._base_columns if col not in ["reward", "action"]]
+        counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack(fill_value=0).reset_index()
+        action_ids = self._action_ids
+        counts_df = self._impute_missing_counts(counts_df, action_ids)
+        reordered_cols = groupby_cols + action_ids
+        counts_df = counts_df[reordered_cols]
+        cumulative_actions_cols = self._cumulative_actions_cols
+        groupby_cols.remove("batch")
+        counts_df[cumulative_actions_cols] = (
+            counts_df.groupby(groupby_cols)[action_ids].cumsum() if groupby_cols else counts_df[action_ids].cumsum()
+        )
+        if groupby_cols:
+            grouped_counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack().fillna(0)
+            grouped_counts_df = self._impute_missing_counts(grouped_counts_df, action_ids)
+            grouped_counts_df = grouped_counts_df.assign(batch="total").set_index(["batch"], append=True).reset_index()
+            grouped_counts_df[cumulative_actions_cols] = grouped_counts_df[action_ids]
+        else:
+            grouped_counts_df = pd.DataFrame()
+        total_counts_df = counts_df.sum(axis=0).to_frame().T
+        total_counts_df = (
+            total_counts_df.assign(batch="total", **{col: "total" for col in groupby_cols})
+            .set_index(["batch"], drop=True)
+            .reset_index()
+        )
+        total_counts_df[cumulative_actions_cols] = total_counts_df[action_ids]
+        counts_df = pd.concat((counts_df, grouped_counts_df, total_counts_df), axis=0, ignore_index=True).set_index(
+            groupby_cols + ["batch"], drop=True
+        )
+        return counts_df
+
+    @property
+    def positive_reward_proportion(self) -> pd.DataFrame:
+        """
+        Get the observed proportion of positive rewards for each a1t the end of the simulation process.
+
+        Returns
+        -------
+        proportion_df : pd.DataFrame
+            Data frame with actions as index, and proportion of positive rewards as values
+        """
+        groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch"]]
+        proportion_df = self._results.groupby(groupby_cols)["reward"].mean().to_frame(name="proportion")
+        return proportion_df
+
+    def _print_results(self):
+        """Private function to print results."""
+        logger.info("Simulation results (first 10 observations):\n", self._results.head(10), "\n")
+        logger.info("Count of actions selected by the bandit: \n", self.selected_actions_count.iloc[-1], "\n")
+        logger.info("Observed proportion of positive rewards for each action:\n", self.positive_reward_proportion, "\n")
+
+    def _save_results(self):
+        """Private function to save results."""
+        self._results.to_csv(self._get_save_path("simulation_results.csv"), index=False)
+        self.selected_actions_count.to_csv(self._get_save_path("selected_actions_count.csv"), index=True)
+        self.positive_reward_proportion.to_csv(self._get_save_path("positive_reward_proportion.csv"), index=True)
+
+    def _get_save_path(self, file_name: str) -> str:
+        """
+        Private function to get the save path.
+
+        Parameters
+        ----------
+        file_name : str
+            The file name.
+
+        Returns
+        -------
+        full_path : str
+            The full path to save the file with attached path and name prefix.
+        """
+        full_path = os.path.join(self.path, f"{self.file_prefix}{file_name}")
+        return full_path
+
+    def _visualize_results(self):
+        """Private function to visualize results."""
+        action_ids = self._action_ids
+        cumulative_actions_cols = self._cumulative_actions_cols
+        selected_actions_count = self.selected_actions_count
+        selected_actions_rate = 100 * pd.merge(
+            selected_actions_count[action_ids].div(selected_actions_count[action_ids].sum(axis=1), axis=0),
+            selected_actions_count[cumulative_actions_cols].div(
+                selected_actions_count[cumulative_actions_cols].sum(axis=1), axis=0
+            ),
+            left_index=True,
+            right_index=True,
+        )
+        step_actions_rate = selected_actions_rate[(selected_actions_rate.reset_index().batch != "total").values]
+        step_actions_rate = (
+            step_actions_rate.unstack(level=list(range(step_actions_rate.index.nlevels)))
+            .to_frame("value")
+            .reset_index()
+        )
+        groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch", "action"]]
+        grouped_df = (
+            step_actions_rate.groupby(groupby_cols if len(groupby_cols) > 1 else groupby_cols[0])
+            if groupby_cols
+            else [("", step_actions_rate)]
+        )
+
+        # plot using bokeh
+        tabs = []
+        for group_name, rates_df in grouped_df:
+            if len(groupby_cols) == 1:
+                group_name = (group_name,)
+            elif len(groupby_cols) == 0:
+                group_name = tuple()
+            overall_actions_rate = selected_actions_rate.loc[group_name + ("total",)].to_frame("total").reset_index()
+            overall_actions_rate = overall_actions_rate[overall_actions_rate["action"].isin(action_ids)]
+
+            # rate vs step line plot
+            step_legend_items = []
+            fig_steps = figure(
+                title="Selected actions rate across steps",
+                x_axis_label="Batch index",
+                y_axis_label="Rate [%]",
+            )
+            for i, action in enumerate(action_ids):
+                if action not in sorted(rates_df.action.unique()):
+                    continue
+                self._add_line_to_figure(fig_steps, step_legend_items, rates_df, i, action)
+
+            self._add_legend_to_figure(step_legend_items, fig_steps)
+            fig_steps.add_tools(HoverTool(tooltips=[("batch", "@batch"), ("action", "@action"), ("value", "@value")]))
+
+            # Overall selected actions bars plot
+            fig_overall = figure(
+                title="Overall selected actions rate",
+                x_axis_label="Action",
+                y_axis_label="Rate [%]",
+                x_range=overall_actions_rate["action"],
+            )
+            fig_overall.vbar(x="action", top="total", width=0.9, source=ColumnDataSource(overall_actions_rate))
+            fig_overall.xgrid.grid_line_color = None
+            fig_overall.add_tools(HoverTool(tooltips=[("action", "@action"), ("rate", "@total")]))
+
+            # cumulative rate vs step line plot
+            cum_legend_items = []
+            fig_cumulative_steps = figure(
+                title="Cumulative selected actions rate across steps",
+                x_axis_label="Batch index",
+                y_axis_label="Rate [%]",
+            )
+            for i, (action, cum_action) in enumerate(zip(action_ids, cumulative_actions_cols)):
+                if action not in rates_df.action.unique():
+                    continue
+                self._add_line_to_figure(fig_cumulative_steps, cum_legend_items, rates_df, i, action, cum_action)
+
+            self._add_legend_to_figure(cum_legend_items, fig_cumulative_steps)
+            fig_cumulative_steps.add_tools(
+                HoverTool(tooltips=[("batch", "@batch"), ("action", "@action"), ("value", "@value")])
+            )
+
+            tabs.append(
+                TabPanel(
+                    child=layout(children=[[fig_steps, fig_overall], [fig_cumulative_steps]]),
+                    title=f"{'_'.join([str(name_part) for name_part in group_name])}",
+                )
+            )
+        visualize_via_bokeh(self._get_save_path("simulation_results.html"), tabs)
+
+    def _add_line_to_figure(
+        self,
+        fig: Plot,
+        legend_items: List[Tuple[str, List]],
+        df: pd.DataFrame,
+        index: int,
+        action: ActionId,
+        action_data_source_id: Optional[str] = None,
+    ):
+        """
+        Add a line corresponding to action based on filtering df using action_data_source_id to the figure.
+
+        Parameters
+        ----------
+        fig : Plot
+            Bokeh figure for which a line should be added.
+        legend_items : List[Tuple[str, List]
+            List of legend elements, given by tuples of name and associated plot members.
+        df : DataFrame
+            Data frame to filter for line data.
+        index : int
+            Line serial number.
+        action : ActionId
+            Subjected action.
+        action_data_source_id : Optional[str], resorts to action if not specified
+            Corresponding value to action to filter df by.
+        """
+
+        action_data_source_id = action_data_source_id or action
+
+        dash_pattern = self._get_modulus_element(index, self._dash_patterns)
+        marker = self._get_modulus_element(index, self._markers)
+        color = self._get_modulus_element(index, self._colors)
+
+        action_data = df[df.action == action_data_source_id]
+        action_source = ColumnDataSource(action_data)
+        line = fig.line("batch", "value", source=action_source, line_width=2, color=color, line_dash=dash_pattern)
+        scatter = fig.scatter("batch", "value", source=action_source, size=8, color=color, marker=marker)
+        legend_items.append((action, [line, scatter]))
+
+    @staticmethod
+    def _add_legend_to_figure(legend_items: List[Tuple[str, List]], fig: Plot):
+        """
+        Add legend with the legend items to fig.
+
+        Parameters
+        ----------
+        legend_items : List[Tuple[str, List]
+            List of legend elements, given by tuples of name and associated plot members.
+        fig : Plot
+            Bokeh figure for which a legend should be added.
+        """
+        legend = Legend(items=legend_items)
+        legend.title = "Actions"
+        legend.location = "right"
+        legend.click_policy = "hide"
+        fig.add_layout(legend, "right")
+
+    @staticmethod
+    def _get_modulus_element(index: int, elements: List):
+        """
+        Get the element of the list at the index modulo the length of the list.
+
+        Parameters
+        ----------
+        index : int
+            Required index
+        elements : List
+            List of elements.
+
+        Returns
+        -------
+            Element of the list at the index modulo the length of the list
+        """
+        return elements[index % len(elements)]
diff --git a/pybandits/smab_simulator.py b/pybandits/smab_simulator.py
new file mode 100644
index 0000000..400fd1b
--- /dev/null
+++ b/pybandits/smab_simulator.py
@@ -0,0 +1,140 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import random
+from typing import Dict, List, Tuple
+
+import numpy as np
+import pandas as pd
+
+from pybandits.base import ActionId, BinaryReward
+from pybandits.pydantic_version_compatibility import Field, model_validator
+from pybandits.simulator import Simulator
+from pybandits.smab import BaseSmabBernoulli
+
+
+class SmabSimulator(Simulator):
+    """
+    Simulate environment for stochastic multi-armed bandits.
+
+    This class performs simulation of stochastic Multi-Armed Bandits (sMAB). Data are processed in batches of size n>=1.
+    Per each batch of simulated samples, the mab selects one action and collects the corresponding simulated reward for
+    each sample. Then, prior parameters are updated based on returned rewards from recommended actions.
+
+    Parameters
+    ----------
+    mab : BaseSmabBernoulli
+        sMAB model.
+    """
+
+    mab: BaseSmabBernoulli = Field(validation_alias="smab")
+    _base_columns: List[str] = ["batch", "action", "reward"]
+
+    @model_validator(mode="before")
+    @classmethod
+    def replace_null_and_validate_probs_reward(cls, values):
+        mab_action_ids = list(values["mab"].actions.keys())
+        probs_reward = cls._get_value_with_default("probs_reward", values)
+        if probs_reward is None:
+            probs_reward = pd.DataFrame(0.5, index=[0], columns=mab_action_ids)
+            values["probs_reward"] = probs_reward
+        else:
+            if len(probs_reward) != 1:
+                raise ValueError("probs_reward must have exactly one row.")
+        return values
+
+    def _initialize_results(self):
+        """
+        Initialize the results DataFrame. The results DataFrame is used to store the raw simulation results.
+        """
+        self._results = pd.DataFrame(columns=["batch", "action", "reward"])
+
+    def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]:
+        """
+        Draw rewards for the selected actions according to probs_reward.
+
+        Parameters
+        ----------
+        actions : List[ActionId]
+            The actions selected by the multi-armed bandit model.
+        metadata : Dict[str, List]
+            The metadata for the selected actions. Not used in this implementation.
+
+        Returns
+        -------
+        reward : List[BinaryReward]
+            A list of binary rewards.
+        """
+        rewards = [int(random.random() < self.probs_reward.loc[0, a]) for a in actions]
+        return rewards
+
+    def _get_batch_step_kwargs_and_metadata(
+        self, batch_index
+    ) -> Tuple[Dict[str, int], Dict[str, np.ndarray], Dict[str, List]]:
+        """
+        Extract context required for the sMAB's update and predict functionality,
+        as well as metadata for sample group.
+
+        Parameters
+        ----------
+        batch_index : int
+            The index of the batch.
+
+        Returns
+        -------
+        predict_kwargs : Dict[str, int]
+            Dictionary containing the number of samples for sMAB prediction.
+        update_kwargs : Dict[str, np.ndarray]
+            Dictionary containing nothing.
+        metadata : Dict[str, List]
+            Dictionary containing nothing.
+        """
+        predict_kwargs = {"n_samples": self.batch_size}
+        update_kwargs = {}
+        metadata = {}
+        return predict_kwargs, update_kwargs, metadata
+
+    def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame:
+        """
+        Finalize the step by adding additional information to the batch results.
+
+        Parameters
+        ----------
+        batch_results : pd.DataFrame
+            raw batch results
+
+        Returns
+        -------
+        batch_results : pd.DataFrame
+            same raw batch results
+        """
+        return batch_results
+
+    def _finalize_results(self):
+        """
+        Finalize the simulation process. It can be used to add additional information to the results.
+
+        Returns
+        -------
+        None
+        """
+        pass
diff --git a/pybandits/utils.py b/pybandits/utils.py
index d0577b5..3d8a62f 100644
--- a/pybandits/utils.py
+++ b/pybandits/utils.py
@@ -1,5 +1,31 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
 import json
-from typing import Any, Callable, Dict, List, Union
+from typing import Any, Callable, Dict, List, Optional, Union
+
+from bokeh.io import curdoc, output_file, output_notebook, save, show
+from bokeh.models import InlineStyleSheet, TabPanel, Tabs
+from IPython import get_ipython
 
 from pybandits.pydantic_version_compatibility import validate_call
 
@@ -42,3 +68,64 @@ def extract_argument_names_from_function(function_handle: Callable, is_class_met
     start_index = int(is_class_method)
     argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount]
     return argument_names
+
+
+def in_jupyter_notebook() -> bool:
+    """
+    Check if the code is running in a Jupyter notebook.
+
+    Reference: https://stackoverflow.com/a/39662359
+
+    Returns
+    -------
+    bool
+        True if the code is running in a Jupyter notebook, False otherwise.
+
+    Raises
+    ------
+    NotImplementedError
+        If the shell type is neither Jupyter notebook nor terminal.
+    """
+
+    try:
+        shell = get_ipython().__class__.__name__
+
+        return shell == "ZMQInteractiveShell"
+
+    except NameError:
+        return False  # Probably standard Python interpreter
+
+
+def visualize_via_bokeh(output_path: Optional[str], tabs: List[TabPanel]):
+    """
+    Visualize output to either a Jupyter notebook or an HTML file.
+
+    Parameters
+    ----------
+    output_path : Optional[str]
+        Path to the output file. Required if not running in a Jupyter notebook.
+    tabs : List[TabPanel]
+        List of TabPanel objects to visualize.
+    """
+
+    if in_jupyter_notebook():
+        output_notebook()
+    else:
+        if output_path is None:
+            raise ValueError("output_path is required when not running in a Jupyter notebook.")
+        output_file(output_path)
+
+    # Add a Div model to the Bokeh layout for flexible tabs
+    tabs_css = """
+                 :host(.bk-Tabs) .bk-header {
+                     flex-wrap: wrap !important;
+                 }
+             """
+
+    tabs_stylesheet = InlineStyleSheet(css=tabs_css)
+    curdoc().title = "Visual report"
+    styled_tabs = Tabs(tabs=tabs, stylesheets=[tabs_stylesheet], sizing_mode="stretch_both")
+    if in_jupyter_notebook():
+        show(styled_tabs)
+    else:
+        save(styled_tabs)
diff --git a/pyproject.toml b/pyproject.toml
index 91bf9fa..fb3e0e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pybandits"
-version = "1.0.2"
+version = "1.1.0"
 description = "Python Multi-Armed Bandit Library"
 authors = [
     "Dario d'Andrea <dariod@playtika.com>",
@@ -16,10 +16,12 @@ readme = "README.md"
 python = ">=3.8.1,<3.12"
 loguru = "^0.6"
 numpy = "^1.23"
-pydantic = ">=1.10"
+pydantic = "1.10.*"
 scipy = "^1.9"
 pymc = "^5.3"
 scikit-learn = "^1.1"
+bokeh = "^3.1"
+
 
 [tool.poetry.group.dev.dependencies]
 hypothesis = "^6.68.2"
@@ -34,8 +36,10 @@ ipykernel = "^6.21.3"
 jupyterlab = "^3.6.1"
 pytest-cov = "^4.0.0"
 pytest_mock = "^3.14.0"
+pytest-xdist = "^3.6.1"
 ruff = "^0.5.6"
 
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
@@ -46,14 +50,13 @@ plugins = "pydantic.mypy"
 [tool.ruff]
 line-length = 120
 extend-include = ["*.ipynb"]
-extend-ignore = ["E203"]
+
 
 # pylint configuration incorporated in Ruff
 [tool.ruff.lint]
 # Enable the isort rules.
 extend-select = ["I"]
-
-[tool.ruff.per-file-ignores]
+extend-ignore = ["E203"]
 # disable check for:
 # D100: Missing docstring in public module (equivalent to C0114)
 # D101: Missing docstring in public class (equivalent to C0115)
@@ -64,4 +67,4 @@ extend-select = ["I"]
 # D106: Missing docstring in public nested class (equivalent to C0115)
 # Missing : Too few public methods (equivalent to R0903)
 # PLR0913: Too many arguments (equivalent to R0913)
-"*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "PLR0913"]
+per-file-ignores = { "*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "PLR0913"] }
diff --git a/tests/test_cmab.py b/tests/test_cmab.py
index fdf2173..208f381 100644
--- a/tests/test_cmab.py
+++ b/tests/test_cmab.py
@@ -27,12 +27,17 @@
 import pytest
 from hypothesis import given, settings
 from hypothesis import strategies as st
-from pydantic import NonNegativeFloat, ValidationError
 
 from pybandits.base import Float01
 from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC
 from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT, UpdateMethods
-from pybandits.pydantic_version_compatibility import PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, pydantic_version
+from pybandits.pydantic_version_compatibility import (
+    PYDANTIC_VERSION_1,
+    PYDANTIC_VERSION_2,
+    NonNegativeFloat,
+    ValidationError,
+    pydantic_version,
+)
 from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit
 from pybandits.utils import to_serializable_dict
 from tests.test_utils import is_serializable
@@ -783,7 +788,7 @@ def test_cmab_cc_predict(n_samples, n_features):
     assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples
 
 
-@settings(deadline=10000)
+@settings(deadline=None)
 @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods))
 def test_cmab_cc_update(n_samples, n_features, update_method):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
diff --git a/tests/test_cmab_simulator.py b/tests/test_cmab_simulator.py
new file mode 100644
index 0000000..cfb8633
--- /dev/null
+++ b/tests/test_cmab_simulator.py
@@ -0,0 +1,167 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+from tempfile import TemporaryDirectory
+
+import numpy as np
+import pandas as pd
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from pytest_mock import MockerFixture
+
+from pybandits.cmab import CmabBernoulli
+from pybandits.cmab_simulator import CmabSimulator
+
+
+def test_mismatched_probs_reward_columns(mocker: MockerFixture, groups=[0, 1]):
+    def check_value_error(probs_reward, context):
+        with pytest.raises(ValueError):
+            CmabSimulator(mab=cmab, probs_reward=probs_reward, groups=groups, context=context)
+
+    num_groups = len(groups)
+    cmab = mocker.Mock(spec=CmabBernoulli)
+    cmab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()}
+    cmab.epsilon = 0.0
+    cmab.default_action = None
+    context = pd.DataFrame({"a1": [0.5] * num_groups, "a2": [0.5] * num_groups})
+    probs_reward = pd.DataFrame({"a1": [0.5], "a2": [0.5]})
+    check_value_error(probs_reward, context)
+    probs_reward = pd.DataFrame({"a1": [0.5] * num_groups, "a2": [0.5] * num_groups})
+    check_value_error(probs_reward, context[:1])
+
+
+def test_cmab_e2e_simulation_with_default_arguments(
+    action_ids=["a1", "a2"], n_features=3, n_updates=2, batch_size=10, num_groups=2
+):
+    mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features)
+    base_groups = list(range(num_groups))
+    group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)]
+    context = (
+        np.repeat(np.arange(3).reshape(1, -1), n_updates * batch_size, axis=0).T * (np.array(group) - np.mean(group))
+    ).T
+    with TemporaryDirectory() as path:
+        simulator = CmabSimulator(
+            mab=mab,
+            visualize=True,
+            save=True,
+            path=path,
+            group=[str(g) for g in group],
+            batch_size=batch_size,
+            n_updates=n_updates,
+            context=context,
+        )
+        simulator.run()
+        assert not simulator.results.empty
+        dir_list = os.listdir(path)
+        assert "simulation_results.csv" in dir_list
+        assert "selected_actions_count.csv" in dir_list
+        assert "positive_reward_proportion.csv" in dir_list
+        assert "simulation_results.html" in dir_list
+
+
+@settings(deadline=None)
+@given(
+    st.just(["a1", "a2"]),
+    st.just(3),
+    st.integers(min_value=1, max_value=3),
+    st.integers(min_value=1, max_value=10),
+    st.booleans(),
+    st.sampled_from([None, 0, 42]),
+    st.booleans(),
+    st.booleans(),
+    st.sampled_from(["", "unit_test"]),
+    st.integers(min_value=1, max_value=3),
+)
+def test_cmab_e2e_simulation_with_non_default_args(
+    action_ids,
+    n_features,
+    n_updates,
+    batch_size,
+    save,
+    random_seed,
+    verbose,
+    visualize,
+    file_prefix,
+    num_groups,
+):
+    base_groups = list(range(num_groups))
+    group = base_groups * (n_updates * batch_size // num_groups) + base_groups[: (n_updates * batch_size % num_groups)]
+    effective_base_groups = sorted(set(group))
+    context = (
+        np.repeat(np.arange(n_features).reshape(1, -1), n_updates * batch_size, axis=0).T
+        * (np.array(group) - np.mean(group))
+    ).T
+    probs_reward = pd.DataFrame(
+        np.random.uniform(0, 1, (len(effective_base_groups), len(action_ids))),
+        columns=action_ids,
+        index=[str(g) for g in effective_base_groups],
+    )
+    mab = CmabBernoulli.cold_start(action_ids=action_ids, n_features=n_features, update_method="VI")
+    if visualize and not save:
+        with pytest.raises(ValueError):
+            CmabSimulator(
+                mab=mab,
+                visualize=visualize,
+                save=save,
+                group=[str(g) for g in group],
+                n_updates=n_updates,
+                batch_size=batch_size,
+                random_seed=random_seed,
+                probs_reward=probs_reward,
+                verbose=verbose,
+                file_prefix=file_prefix,
+                context=context,
+            )
+    else:
+        with TemporaryDirectory() as path:
+            simulator = CmabSimulator(
+                mab=mab,
+                visualize=visualize,
+                save=save,
+                path=path,
+                group=[str(g) for g in group],
+                n_updates=n_updates,
+                batch_size=batch_size,
+                random_seed=random_seed,
+                probs_reward=probs_reward,
+                verbose=verbose,
+                file_prefix=file_prefix,
+                context=context,
+            )
+            simulator.run()
+            if save:
+                assert not simulator.results.empty
+                dir_list = os.listdir(path)
+                if file_prefix:
+                    assert f"{file_prefix}_simulation_results.csv" in dir_list
+                    assert f"{file_prefix}_selected_actions_count.csv" in dir_list
+                    assert f"{file_prefix}_positive_reward_proportion.csv" in dir_list
+                    if visualize:
+                        assert f"{file_prefix}_simulation_results.html" in dir_list
+                else:
+                    assert "simulation_results.csv" in dir_list
+                    assert "selected_actions_count.csv" in dir_list
+                    assert "positive_reward_proportion.csv" in dir_list
+                    if visualize:
+                        assert "simulation_results.html" in dir_list
diff --git a/tests/test_simulation_cmab.py b/tests/test_simulation_cmab.py
deleted file mode 100644
index c01d964..0000000
--- a/tests/test_simulation_cmab.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# # MIT License
-# #
-# # Copyright (c) 2022 Playtika Ltd.
-# #
-# # Permission is hereby granted, free of charge, to any person obtaining a copy
-# # of this software and associated documentation files (the "Software"), to deal
-# # in the Software without restriction, including without limitation the rights
-# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# # copies of the Software, and to permit persons to whom the Software is
-# # furnished to do so, subject to the following conditions:
-# #
-# # The above copyright notice and this permission notice shall be included in all
-# # copies or substantial portions of the Software.
-# #
-# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# # SOFTWARE.
-
-# import numpy as np
-# import pandas as pd
-# from numpy.testing import assert_equal
-# from pandas._testing import assert_frame_equal
-
-# from pybandits.cmab import Cmab
-# from pybandits.simulation_cmab import SimulationCmab
-
-
-# def test_init():
-#     """Test init SimulationCmab."""
-
-#     random_seed = 1
-#     batch_size = 100
-#     n_groups = 3
-#     n_updates = 10
-#     n_jobs = 1
-#     actions_ids = ['action A', 'action B', 'action C']
-#     n_features = 5
-#     X = np.random.rand(batch_size*n_updates, n_features)
-
-#     group = np.random.randint(n_groups, size=batch_size*n_updates)
-#     cmab = Cmab(n_features=n_features, actions_ids=actions_ids, n_jobs=n_jobs, random_seed=random_seed)
-#     prob_rewards = pd.DataFrame([[0.05, 0.80, 0.05],
-#                                  [0.80, 0.05, 0.05],
-#                                  [0.80, 0.05, 0.80]], columns=actions_ids, index=range(n_groups))
-#     verbose = True
-#     save = True
-#     path = 'tests/simulation/'
-
-#     # init with default params
-#     sim = SimulationCmab(cmab=cmab, X=X)
-
-#     # init with custom params
-#     X = pd.DataFrame(np.random.rand(batch_size * n_updates, n_features))
-#     sim = SimulationCmab(cmab=cmab, X=X, group=group, batch_size=batch_size, n_updates=n_updates,
-#                          prob_rewards=prob_rewards, save=save, path=path, random_seed=random_seed, verbose=verbose)
-
-#     assert sim._X.shape == (sim._batch_size * sim._n_updates, sim._cmab._n_features), 'X shape mismatch'
-#     assert sim.results['group'].shape == (sim._batch_size * sim._n_updates,), ' group shape mismatch'
-#     assert sim.results['group'].isin(range(sim._n_groups)).all(), 'group array should contain only values in ' \
-#                                                                   '' + str(range(n_groups))
-#     assert sim._rewards.shape == (sim._batch_size * sim._n_updates, len(actions_ids)), 'reward shape mismatch'
-#     assert sim.results.shape == (sim._batch_size * sim._n_updates, sim._cmab._n_features), 'result shape mismatch'
-
-#     assert_frame_equal(sim._X, X)
-
-
-# def test_run():
-#     """ Test simulation with cmab model. """
-
-#     random_seed = 2
-#     batch_size = 10
-#     features_ids = ['feat_1', 'feat_2']
-#     n_groups = 2
-#     n_updates = 2
-#     actions_ids = ['action A', 'action B']
-#     prob_rewards = pd.DataFrame([[0.05, 0.80],
-#                                  [0.80, 0.05]], columns=actions_ids, index=range(n_groups))
-#     cmab = Cmab(n_features=len(features_ids), actions_ids=actions_ids, random_seed=random_seed)
-#     df = pd.DataFrame([[34.07772868659151, -28.948390811625714, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 0.75],
-#                       [47.602172988242444, -11.585294068594154, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 0.75],
-#                       [58.74075304505904, -99.71942656529076, 'action A', 0.0, 1, 0.8, 0.8, 0.0, 0.75],
-#                       [41.462039348288144, -117.66517424958462, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 1.5],
-#                       [56.18746540687566, -122.02451865370041, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 2.25],
-#                       [29.982587761836534, -114.24870860989691, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 3.0],
-#                       [56.085236090749326, -1.974650230141235, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 3.0],
-#                       [32.42925525229372, -106.92841840939255, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 3.75],
-#                       [31.00949679739198, -42.8284308455658, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 4.5],
-#                       [35.47919944987376, -88.52700687134366, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 4.5],
-#                       [62.6929015285017, -148.44461692725085, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 4.5],
-#                       [39.95202041753999, 49.56228281374906, 'action B', 0.0, 1, 0.05, 0.8, 0.75, 5.25],
-#                       [47.779661185138565, 23.932111189164278, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25],
-#                       [31.077061872610724, 88.69384882684793, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25],
-#                       [56.119706130978265, -138.1918694119457, 'action B', 1.0, 0, 0.8, 0.8, 0.0, 5.25],
-#                       [37.9189897898034, 136.88209858829075, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 5.25],
-#                       [35.18935782070607, -83.10216873048782, 'action B', 0.0, 0, 0.8, 0.8, 0.0, 5.25],
-#                       [2.11516522092686, -0.702259810984084, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 6.0],
-#                       [25.560807764772438, -2.8576525901465555, 'action A', 0.0, 0, 0.05, 0.8, 0.75, 6.75],
-#                       [40.465956204463076, 144.48135008944516, 'action A', 1.0, 1, 0.8, 0.8, 0.0, 6.75]],
-#                       columns=features_ids+['action', 'reward', 'group', 'selected_prob_reward', 'max_prob_reward',
-#                                             'regret', 'cum_regret'])
-#     X = df[features_ids]
-#     group = df['group']
-#     verbose = True
-#     save = False
-#     path = 'tests/simulation/'
-
-#     # init simulation
-#     sim = SimulationCmab(cmab=cmab, X=X, group=group, batch_size=batch_size, n_updates=n_updates,
-#                          prob_rewards=prob_rewards, save=save, path=path, random_seed=random_seed, verbose=verbose)
-
-#     # start simulation
-#     sim.run()
-#     assert_frame_equal(sim.results, df[['action', 'reward', 'group', 'selected_prob_reward', 'max_prob_reward',
-#                                         'regret', 'cum_regret']])
-
-#     # test functions get
-#     d = {'group 0': {'action A': 6, 'action B': 4},
-#          'group 1': {'action A': 7, 'action B': 3}}
-#     assert_equal(sim.get_count_selected_actions(), d)
-
-#     d = {'group 0': {'action A': np.nan, 'action B': 0.75},
-#          'group 1': {'action A': 0.8571428571428571, 'action B': np.nan}}
-#     assert_equal(sim.get_proportion_positive_reward(), d)
diff --git a/tests/test_simulation_smab.py b/tests/test_simulation_smab.py
deleted file mode 100644
index a180ef2..0000000
--- a/tests/test_simulation_smab.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# # MIT License
-# #
-# # Copyright (c) 2022 Playtika Ltd.
-# #
-# # Permission is hereby granted, free of charge, to any person obtaining a copy
-# # of this software and associated documentation files (the "Software"), to deal
-# # in the Software without restriction, including without limitation the rights
-# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# # copies of the Software, and to permit persons to whom the Software is
-# # furnished to do so, subject to the following conditions:
-# #
-# # The above copyright notice and this permission notice shall be included in all
-# # copies or substantial portions of the Software.
-# #
-# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# # SOFTWARE.
-
-# import pandas as pd
-# from pandas._testing import assert_frame_equal
-
-# from pybandits.smab import Smab
-# from pybandits.simulation_smab import SimulationSmab
-
-
-# def test_init():
-#     random_seed = 1
-#     smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed)
-#     SimulationSmab(smab=smab)
-#     SimulationSmab(
-#         smab=smab,
-#         n_updates=20,
-#         batch_size=2000,
-#         probs_reward={"action A": 0.6, "action B": 0.0, "action C": 1.0},
-#         save=True,
-#         path="folder/",
-#         random_seed=1,
-#         verbose=True,
-#     )
-
-
-# def test_run():
-#     random_seed = 1
-#     smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed)
-#     sim = SimulationSmab(smab=smab, n_updates=5, batch_size=6, random_seed=random_seed)
-#     sim.run()
-
-#     X = pd.DataFrame(
-#         [
-#             ["action B", 1.0],
-#             ["action B", 0.0],
-#             ["action A", 1.0],
-#             ["action A", 0.0],
-#             ["action C", 1.0],
-#             ["action C", 0.0],
-#             ["action C", 1.0],
-#             ["action A", 1.0],
-#             ["action A", 0.0],
-#             ["action A", 0.0],
-#             ["action B", 1.0],
-#             ["action B", 0.0],
-#             ["action C", 1.0],
-#             ["action C", 0.0],
-#             ["action B", 1.0],
-#             ["action C", 0.0],
-#             ["action B", 0.0],
-#             ["action C", 1.0],
-#             ["action C", 1.0],
-#             ["action A", 1.0],
-#             ["action C", 0.0],
-#             ["action A", 0.0],
-#             ["action B", 1.0],
-#             ["action B", 0.0],
-#             ["action C", 1.0],
-#             ["action C", 0.0],
-#             ["action B", 1.0],
-#             ["action B", 0.0],
-#             ["action C", 0.0],
-#             ["action C", 1.0],
-#         ],
-#         columns=["action", "reward"],
-#     )
-#     assert_frame_equal(sim.results, X)
-
-
-# def test_functions_get():
-#     random_seed = 1
-#     smab = Smab(action_ids=["action A", "action B", "action C"], random_seed=random_seed)
-#     sim = SimulationSmab(smab=smab, n_updates=5, batch_size=6, random_seed=random_seed)
-#     sim.run()
-
-#     summary_action = {"action A": 7, "action B": 10, "action C": 13}
-#     summary_reward = {"action A": 0.42857142857142855, "action B": 0.5, "action C": 0.5384615384615384}
-
-#     _, _ = sim.get_cumulative_proportions()["action"], sim.get_cumulative_proportions()["reward"]
-
-#     assert sim.get_count_selected_actions() == summary_action
-#     assert sim.get_proportion_positive_reward() == summary_reward
diff --git a/tests/test_simulator.py b/tests/test_simulator.py
new file mode 100644
index 0000000..eee7b5e
--- /dev/null
+++ b/tests/test_simulator.py
@@ -0,0 +1,67 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from random import choice
+from typing import Dict, List, Tuple
+
+import numpy as np
+import pandas as pd
+import pytest
+from pytest_mock import MockerFixture
+
+from pybandits.base import ActionId, BinaryReward
+from pybandits.mab import BaseMab
+from pybandits.simulator import Simulator
+
+
+class DummySimulator(Simulator):
+    def _initialize_results(self):
+        self._results = pd.DataFrame
+
+    def _draw_rewards(self, actions: List[ActionId], metadata: Dict[str, List]) -> List[BinaryReward]:
+        return choice([0, 1], k=len(actions))
+
+    def _get_batch_step_kwargs_and_metadata(self, batch_index: int) -> Tuple[Dict[str, np.ndarray], Dict[str, List]]:
+        return {}, {}
+
+    def _finalize_step(self, batch_results: pd.DataFrame) -> pd.DataFrame:
+        return batch_results
+
+    def _finalize_results(self):
+        pass
+
+
+def test_mismatched_probs_reward_columns(mocker: MockerFixture):
+    def check_value_error(probs_reward):
+        with pytest.raises(ValueError):
+            DummySimulator(mab=mab, probs_reward=probs_reward)
+
+    mab = mocker.Mock(spec=BaseMab)
+    mab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()}
+    mab.epsilon = 0.0
+    mab.default_action = None
+    probs_reward = pd.DataFrame({"a3": [0.5]})
+    check_value_error(probs_reward)
+    probs_reward = pd.DataFrame({"a1": [0.5], "a2": [2]})
+    check_value_error(probs_reward)
+    probs_reward = pd.DataFrame({"a1": [0.5], "a2": [0.5], "a3": [0.5]})
+    check_value_error(probs_reward)
diff --git a/tests/test_smab_simulator.py b/tests/test_smab_simulator.py
new file mode 100644
index 0000000..912051d
--- /dev/null
+++ b/tests/test_smab_simulator.py
@@ -0,0 +1,119 @@
+# MIT License
+#
+# Copyright (c) 2022 Playtika Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+from tempfile import TemporaryDirectory
+
+import numpy as np
+import pandas as pd
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from pytest_mock import MockerFixture
+
+from pybandits.model import Beta
+from pybandits.smab import SmabBernoulli
+from pybandits.smab_simulator import SmabSimulator
+
+
+def test_mismatched_probs_reward_columns(mocker: MockerFixture):
+    smab = mocker.Mock(spec=SmabBernoulli)
+    smab.actions = {"a1": mocker.Mock(), "a2": mocker.Mock()}
+    smab.epsilon = 0.0
+    smab.default_action = None
+    probs_reward = pd.DataFrame({"a1": [0.5, 0.5], "a2": [0.5, 0.5]})
+    with pytest.raises(ValueError):
+        SmabSimulator(mab=smab, probs_reward=probs_reward)
+
+
+def test_smab_e2e_simulation_with_default_args(action_ids=["a1", "a2"]):
+    mab = SmabBernoulli(actions={action_id: Beta() for action_id in action_ids})
+    with TemporaryDirectory() as path:
+        simulator = SmabSimulator(mab=mab, visualize=True, save=True, path=path)
+        simulator.run()
+        assert not simulator.results.empty
+        dir_list = os.listdir(path)
+        assert "simulation_results.csv" in dir_list
+        assert "selected_actions_count.csv" in dir_list
+        assert "positive_reward_proportion.csv" in dir_list
+        assert "simulation_results.html" in dir_list
+
+
+@settings(deadline=1000)
+@given(
+    st.just(["a1", "a2"]),
+    st.integers(min_value=1, max_value=10),
+    st.integers(min_value=1, max_value=10),
+    st.booleans(),
+    st.sampled_from([None, 0, 42]),
+    st.booleans(),
+    st.booleans(),
+    st.sampled_from(["", "unit_test"]),
+)
+def test_smab_e2e_simulation_with_non_default_args(
+    action_ids, n_updates, batch_size, save, random_seed, verbose, visualize, file_prefix
+):
+    probs_reward = pd.DataFrame(np.random.uniform(0, 1, (1, len(action_ids))), columns=action_ids)
+    mab = SmabBernoulli.cold_start(action_ids=action_ids)
+    if visualize and not save:
+        with pytest.raises(ValueError):
+            SmabSimulator(
+                mab=mab,
+                visualize=visualize,
+                save=save,
+                n_updates=n_updates,
+                batch_size=batch_size,
+                random_seed=random_seed,
+                probs_reward=probs_reward,
+                verbose=verbose,
+                file_prefix=file_prefix,
+            )
+    else:
+        with TemporaryDirectory() as path:
+            simulator = SmabSimulator(
+                mab=mab,
+                visualize=visualize,
+                save=save,
+                path=path,
+                n_updates=n_updates,
+                batch_size=batch_size,
+                random_seed=random_seed,
+                probs_reward=probs_reward,
+                verbose=verbose,
+                file_prefix=file_prefix,
+            )
+            simulator.run()
+            if save:
+                assert not simulator.results.empty
+                dir_list = os.listdir(path)
+                if file_prefix:
+                    assert f"{file_prefix}_simulation_results.csv" in dir_list
+                    assert f"{file_prefix}_selected_actions_count.csv" in dir_list
+                    assert f"{file_prefix}_positive_reward_proportion.csv" in dir_list
+                    if visualize:
+                        assert f"{file_prefix}_simulation_results.html" in dir_list
+                else:
+                    assert "simulation_results.csv" in dir_list
+                    assert "selected_actions_count.csv" in dir_list
+                    assert "positive_reward_proportion.csv" in dir_list
+                    if visualize:
+                        assert "simulation_results.html" in dir_list