diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml
index ab2679e..4059d01 100644
--- a/.github/workflows/continuous_delivery.yml
+++ b/.github/workflows/continuous_delivery.yml
@@ -29,10 +29,23 @@ jobs:
           export PATH="$HOME/.poetry/bin:$PATH"
       - name: Backup pyproject.toml
         run: cp pyproject.toml pyproject.toml.bak
+      - name: Change pydantic version
+        run: |
+          poetry add pydantic@${{ matrix.pydantic-version }} --lock
+      - name: Cache Poetry virtualenv and dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.local/share/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
       - name: Install project dependencies with Poetry
         run: |
-          poetry add pydantic@${{ matrix.pydantic-version }}
           poetry install
+      - name: Restore pyproject.toml
+        run: |
           mv pyproject.toml.bak pyproject.toml
       - name: Style check
         run: |
diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
index 2853deb..3b9de13 100644
--- a/.github/workflows/continuous_integration.yml
+++ b/.github/workflows/continuous_integration.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.8", "3.9", "3.10" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         pydantic-version: [ "1.10.*", "2.*" ]
 
     steps:
@@ -35,9 +35,20 @@ jobs:
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
           export PATH="$HOME/.poetry/bin:$PATH"
+      - name: Change pydantic version
+        run: |
+          poetry add pydantic@${{ matrix.pydantic-version }} --lock
+      - name: Cache Poetry virtualenv and dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.local/share/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
       - name: Install project dependencies with Poetry
         run: |
-          poetry add pydantic@${{ matrix.pydantic-version }}
           poetry install
       - name: Style check
         run: |
@@ -45,4 +56,8 @@ jobs:
           poetry run pre-commit run --all-files
       - name: Run tests
         run: |
+          START_TIME=$(date +%s)
           poetry run pytest -vv -k 'not time and not update_parallel'
+          END_TIME=$(date +%s)
+          DURATION=$((END_TIME - START_TIME))
+          echo "Tests completed in $DURATION seconds."
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c185f1b..d0b0d5f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
         types_or: [ python, pyi, jupyter ]
         require_serial: true
 
-  - repo: https://github.com/fastai/nbdev
-    rev: 2.3.11
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
     hooks:
-    - id: nbdev_clean
+      - id: nbstripout
diff --git a/docs/src/tutorials/cmab.ipynb b/docs/src/tutorials/cmab.ipynb
index 07b74fa..cb83a7e 100644
--- a/docs/src/tutorials/cmab.ipynb
+++ b/docs/src/tutorials/cmab.ipynb
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
@@ -56,31 +56,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X: context matrix of shape (n_samples, n_features)\n",
-      "[[-0.53211475 -0.40592956  0.05892565 -0.88067628 -0.84061481]\n",
-      " [-0.95680954 -0.00540581  0.09148556 -0.82021004 -0.63425381]\n",
-      " [-0.87792928 -0.51881823 -0.51767022 -0.05385187 -0.64499044]\n",
-      " [-0.10569516  0.30847784 -0.353929   -0.94831998 -0.52175713]\n",
-      " [-0.05088401  0.17155683 -0.4322128  -0.07509104 -0.78919832]\n",
-      " [-0.88604157  0.55037109  0.42634479 -0.87179776 -0.69767766]\n",
-      " [-0.0022063   0.99304089  0.76398198 -0.87343131 -0.12363411]\n",
-      " [ 0.36371019  0.6660538   0.17177652 -0.08891719 -0.91070485]\n",
-      " [-0.1056742  -0.72879406 -0.69367421 -0.8684397   0.70903817]\n",
-      " [-0.15422305  0.31069811 -0.47487951  0.00853137  0.23793364]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# context\n",
     "n_samples = 1000\n",
@@ -92,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -109,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,18 +108,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Recommended action: ['action C' 'action C' 'action B' 'action B' 'action C' 'action C'\n",
-      " 'action B' 'action C' 'action B' 'action C']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# predict action\n",
     "pred_actions, _ = cmab.predict(X)\n",
@@ -153,17 +126,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulated rewards: [1 0 0 0 0 0 0 0 1 1]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# simulate reward from environment\n",
     "simulated_rewards = np.random.randint(2, size=n_samples)\n",
@@ -179,31 +144,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# update model\n",
     "cmab.update(X, actions=pred_actions, rewards=simulated_rewards)"
diff --git a/docs/src/tutorials/simulation_cmab.ipynb b/docs/src/tutorials/simulation_cmab.ipynb
index 1ce2423..5a972ee 100644
--- a/docs/src/tutorials/simulation_cmab.ipynb
+++ b/docs/src/tutorials/simulation_cmab.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -80,77 +80,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Probability of positive reward for each group/action:\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>action A</th>\n",
-       "      <th>action B</th>\n",
-       "      <th>action C</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.80</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   action A  action B  action C\n",
-       "0      0.05      0.80      0.05\n",
-       "1      0.80      0.05      0.05\n",
-       "2      0.80      0.05      0.80"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# init probability of rewards from the environment\n",
     "prob_rewards = pd.DataFrame(\n",
@@ -171,7 +103,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -181,24 +113,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setup simulation  completed.\n",
-      "Simulated input probability rewards:\n",
-      "        action A  action B  action C\n",
-      "group                              \n",
-      "0      0.041176  0.835294  0.052941\n",
-      "1      0.819277  0.036145  0.054217\n",
-      "2      0.786585  0.042683  0.817073 \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# init simulation\n",
     "sim = SimulationCmab(\n",
@@ -222,205 +139,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #1\n",
-      "Start predict batch 1 ...\n",
-      "Start update batch 1 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 11 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 10 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #2\n",
-      "Start predict batch 2 ...\n",
-      "Start update batch 2 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 9 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #3\n",
-      "Start predict batch 3 ...\n",
-      "Start update batch 3 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 9 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #4\n",
-      "Start predict batch 4 ...\n",
-      "Start update batch 4 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #5\n",
-      "Start predict batch 5 ...\n",
-      "Start update batch 5 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulation results (first 10 observations):\n",
-      "      action  reward  group  selected_prob_reward  max_prob_reward  regret  \\\n",
-      "0  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "1  action C     1.0      2                  0.80              0.8    0.00   \n",
-      "2  action B     1.0      0                  0.80              0.8    0.00   \n",
-      "3  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "4  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "5  action B     1.0      0                  0.80              0.8    0.00   \n",
-      "6  action A     0.0      0                  0.05              0.8    0.75   \n",
-      "7  action C     0.0      2                  0.80              0.8    0.00   \n",
-      "8  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "9  action C     1.0      2                  0.80              0.8    0.00   \n",
-      "\n",
-      "   cum_regret  \n",
-      "0        0.75  \n",
-      "1        0.75  \n",
-      "2        0.75  \n",
-      "3        1.50  \n",
-      "4        2.25  \n",
-      "5        2.25  \n",
-      "6        3.00  \n",
-      "7        3.00  \n",
-      "8        3.75  \n",
-      "9        3.75   \n",
-      "\n",
-      "Count of actions selected by the bandit: \n",
-      " {'group 0': {'action B': 85, 'action A': 53, 'action C': 32}, 'group 1': {'action A': 109, 'action C': 31, 'action B': 26}, 'group 2': {'action A': 70, 'action C': 59, 'action B': 35}} \n",
-      "\n",
-      "Observed proportion of positive rewards for each action:\n",
-      " {'group 0': {'action B': 0.788235294117647, 'action A': 0.03773584905660377, 'action C': 0.03125}, 'group 1': {'action A': 0.7981651376146789, 'action B': 0.07692307692307693, 'action C': 0.03225806451612903}, 'group 2': {'action A': 0.7142857142857143, 'action C': 0.8305084745762712, 'action B': 0.02857142857142857}} \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.run()"
    ]
diff --git a/docs/src/tutorials/simulation_smab.ipynb b/docs/src/tutorials/simulation_smab.ipynb
index df7d1e8..e15d57f 100644
--- a/docs/src/tutorials/simulation_smab.ipynb
+++ b/docs/src/tutorials/simulation_smab.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,35 +95,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulation results (first 10 observations):\n",
-      "      action  reward\n",
-      "0  Action B     0.0\n",
-      "1  Action C     1.0\n",
-      "2  Action C     0.0\n",
-      "3  Action A     1.0\n",
-      "4  Action B     1.0\n",
-      "5  Action C     1.0\n",
-      "6  Action A     1.0\n",
-      "7  Action A     1.0\n",
-      "8  Action B     0.0\n",
-      "9  Action B     0.0 \n",
-      "\n",
-      "Count of actions selected by the bandit: \n",
-      " {'Action C': 38670, 'Action B': 683, 'Action A': 647} \n",
-      "\n",
-      "Observed proportion of positive rewards for each action:\n",
-      " {'Action A': 0.6120556414219475, 'Action B': 0.4978038067349927, 'Action C': 0.7995603827256271} \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# run simulation\n",
     "sim.run()"
diff --git a/docs/src/tutorials/smab.ipynb b/docs/src/tutorials/smab.ipynb
index ed119e1..c4bc60c 100644
--- a/docs/src/tutorials/smab.ipynb
+++ b/docs/src/tutorials/smab.ipynb
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,17 +98,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Recommended action: ['Action C', 'Action C', 'Action C', 'Action B', 'Action B', 'Action C', 'Action B', 'Action C', 'Action A', 'Action B']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# predict actions\n",
     "pred_actions, _ = smab.predict(n_samples=1000)\n",
@@ -124,19 +116,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Action A: n_successes=285, n_failures=31\n",
-      "Action B: n_successes=123, n_failures=210\n",
-      "Action C: n_successes=261, n_failures=90\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# simulate rewards from environment\n",
     "n_successes, n_failures = {}, {}\n",
@@ -155,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/docs/tutorials/cmab.ipynb b/docs/tutorials/cmab.ipynb
index 07b74fa..cb83a7e 100644
--- a/docs/tutorials/cmab.ipynb
+++ b/docs/tutorials/cmab.ipynb
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
@@ -56,31 +56,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X: context matrix of shape (n_samples, n_features)\n",
-      "[[-0.53211475 -0.40592956  0.05892565 -0.88067628 -0.84061481]\n",
-      " [-0.95680954 -0.00540581  0.09148556 -0.82021004 -0.63425381]\n",
-      " [-0.87792928 -0.51881823 -0.51767022 -0.05385187 -0.64499044]\n",
-      " [-0.10569516  0.30847784 -0.353929   -0.94831998 -0.52175713]\n",
-      " [-0.05088401  0.17155683 -0.4322128  -0.07509104 -0.78919832]\n",
-      " [-0.88604157  0.55037109  0.42634479 -0.87179776 -0.69767766]\n",
-      " [-0.0022063   0.99304089  0.76398198 -0.87343131 -0.12363411]\n",
-      " [ 0.36371019  0.6660538   0.17177652 -0.08891719 -0.91070485]\n",
-      " [-0.1056742  -0.72879406 -0.69367421 -0.8684397   0.70903817]\n",
-      " [-0.15422305  0.31069811 -0.47487951  0.00853137  0.23793364]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# context\n",
     "n_samples = 1000\n",
@@ -92,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -109,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,18 +108,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Recommended action: ['action C' 'action C' 'action B' 'action B' 'action C' 'action C'\n",
-      " 'action B' 'action C' 'action B' 'action C']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# predict action\n",
     "pred_actions, _ = cmab.predict(X)\n",
@@ -153,17 +126,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulated rewards: [1 0 0 0 0 0 0 0 1 1]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# simulate reward from environment\n",
     "simulated_rewards = np.random.randint(2, size=n_samples)\n",
@@ -179,31 +144,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# update model\n",
     "cmab.update(X, actions=pred_actions, rewards=simulated_rewards)"
diff --git a/docs/tutorials/mab.ipynb b/docs/tutorials/mab.ipynb
index 22c5666..d139501 100644
--- a/docs/tutorials/mab.ipynb
+++ b/docs/tutorials/mab.ipynb
@@ -3,7 +3,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "e2595bf3-9767-4338-9a51-ce706dc306cf",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Stochastic Bernoulli Bandit"
@@ -11,8 +11,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "8f8462e5-f38e-4b04-9002-07ababe3ee0c",
+   "execution_count": null,
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,21 +25,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "75d6f625",
+   "execution_count": null,
+   "id": "2",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'%.2f'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# print 2 decimal places in the notebook\n",
     "%precision %.2f"
@@ -48,7 +37,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "b6b37329-6a3b-4f2a-87a5-e0dcbbb1bb69",
+   "id": "3",
    "metadata": {},
    "source": [
     "## 1. Initialization\n",
@@ -58,7 +47,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "2ca215bf-6321-4819-a539-ebf1f378436a",
+   "id": "4",
    "metadata": {},
    "source": [
     "### 1.1 Initialize via class constructor\n",
@@ -68,8 +57,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "701111ff-b659-49b7-8cf5-8349536b4cd8",
+   "execution_count": null,
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,38 +73,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "55112a02-8df2-4895-9414-ddabbfc8ecac",
+   "execution_count": null,
+   "id": "6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulli</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ClassicBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulli\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mClassicBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
@@ -123,7 +84,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "f2ee7bdc-3881-47a5-b7d4-84862f70e643",
+   "id": "7",
    "metadata": {},
    "source": [
     "### 1.2 Initialize via utility function (for cold start)"
@@ -132,7 +93,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "564914fd-73cc-4854-8ec7-548970f794a6",
+   "id": "8",
    "metadata": {},
    "source": [
     "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`."
@@ -140,8 +101,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "dbfb0ddd-4c16-441f-8c68-16020e425d57",
+   "execution_count": null,
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -151,38 +112,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "fcc3649c-d08c-46db-a534-f61d97962c99",
+   "execution_count": null,
+   "id": "10",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulli</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ClassicBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulli\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mClassicBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
@@ -190,7 +123,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "aa91a5ed-83cc-4016-aa3e-17b8a102bb77",
+   "id": "11",
    "metadata": {},
    "source": [
     "## 2. Function `predict()`"
@@ -198,46 +131,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "a735c03d-cde4-4147-a50d-4b82dd9c1792",
+   "execution_count": null,
+   "id": "12",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on method predict in module pybandits.smab:\n",
-      "\n",
-      "predict(n_samples: pydantic.types.PositiveInt = 1, forbidden_actions: Optional[Set[pybandits.base.ActionId]] = None) -> Tuple[List[pybandits.base.ActionId], List[Dict[pybandits.base.ActionId, pybandits.base.Probability]]] method of pybandits.smab.SmabBernoulli instance\n",
-      "    Predict actions.\n",
-      "    \n",
-      "    Parameters\n",
-      "    ----------\n",
-      "    n_samples : int > 0, default=1\n",
-      "        Number of samples to predict.\n",
-      "    forbidden_actions : Optional[Set[ActionId]], default=None\n",
-      "        Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only\n",
-      "        consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions.\n",
-      "        Note that: actions = allowed_actions U forbidden_actions.\n",
-      "    \n",
-      "    Returns\n",
-      "    -------\n",
-      "    actions: List[ActionId] of shape (n_samples,)\n",
-      "        The actions selected by the multi-armed bandit model.\n",
-      "    probs: List[Dict[ActionId, Probability]] of shape (n_samples,)\n",
-      "        The probabilities of getting a positive reward for each action.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "help(mab.predict)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "f3d9cb8b-7d9b-437b-bbc2-e7a55475a1fb",
+   "execution_count": null,
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -247,54 +152,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "a9284b11-05ba-4cda-9597-b69e6d7632a3",
+   "execution_count": null,
+   "id": "14",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['a3', 'a1', 'a3', 'a1', 'a3']"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "actions"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "84cdbed4-9aa5-42e1-84db-1f8f72c52d93",
+   "execution_count": null,
+   "id": "15",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'a1': 0.68, 'a3': 0.77, 'a2': 0.51},\n",
-       " {'a1': 0.85, 'a3': 0.18, 'a2': 0.82},\n",
-       " {'a1': 0.68, 'a3': 0.82, 'a2': 0.42},\n",
-       " {'a1': 0.98, 'a3': 0.72, 'a2': 0.22},\n",
-       " {'a1': 0.72, 'a3': 0.83, 'a2': 0.13}]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "probs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "bfc53fc8-b1bf-42ea-907a-fa5fb7173199",
+   "execution_count": null,
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -304,46 +183,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "696d58f4-ca5f-41d4-983f-bc7a5351ab28",
+   "execution_count": null,
+   "id": "17",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['a2', 'a2', 'a2', 'a3', 'a2']"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "actions"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "f5826785-a5c6-4c06-9bab-9f05134e783e",
+   "execution_count": null,
+   "id": "18",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'a3': 0.71, 'a2': 0.86},\n",
-       " {'a3': 0.51, 'a2': 0.55},\n",
-       " {'a3': 0.42, 'a2': 0.87},\n",
-       " {'a3': 0.89, 'a2': 0.52},\n",
-       " {'a3': 0.41, 'a2': 0.42}]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "probs"
    ]
@@ -351,7 +204,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "d89f7199-bec3-407d-92a9-bdf917c13de6",
+   "id": "19",
    "metadata": {},
    "source": [
     "## 3. Function `update()`"
@@ -359,42 +212,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "140eb2fc-3659-4c13-86d1-ec5a575c79c1",
+   "execution_count": null,
+   "id": "20",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on method update in module pybandits.smab:\n",
-      "\n",
-      "update(actions: List[pybandits.base.ActionId], rewards: List[pybandits.base.BinaryReward]) method of pybandits.smab.SmabBernoulli instance\n",
-      "    Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary\n",
-      "    rewards.\n",
-      "    \n",
-      "    Parameters\n",
-      "    ----------\n",
-      "    actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5']\n",
-      "        The selected action for each sample.\n",
-      "    rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives)\n",
-      "        The binary reward for each sample.\n",
-      "            If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.\n",
-      "                rewards = [1, 0, 1, 1, 1, ...]\n",
-      "            If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):\n",
-      "                rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "help(mab.update)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "2526ed6d-82d4-4485-bc6e-b5cb53dd78a5",
+   "execution_count": null,
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -404,38 +233,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "3bd0ab45-94e8-415b-adea-a089c54f6274",
+   "execution_count": null,
+   "id": "22",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulli</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span><span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ClassicBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulli\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m2\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mClassicBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# update\n",
     "mab.update(actions=actions, rewards=rewards)\n",
@@ -445,7 +246,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "9823d84c-862b-4bb6-ab36-024f34460595",
+   "id": "23",
    "metadata": {},
    "source": [
     "## 4. Example of usage\n",
@@ -455,8 +256,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "a785463d-d710-4844-80bf-42c09b0e0b45",
+   "execution_count": null,
+   "id": "24",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -476,38 +277,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "034add3d-e6f3-471c-b8b9-30c286faf2cc",
+   "execution_count": null,
+   "id": "25",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulli</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">337</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">369</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4448</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4315</span><span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">246</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">296</span><span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ClassicBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulli\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m337\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m369\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m4448\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m4315\u001b[0m\u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m246\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m296\u001b[0m\u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mClassicBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
diff --git a/docs/tutorials/simulation_cmab.ipynb b/docs/tutorials/simulation_cmab.ipynb
index 1ce2423..5a972ee 100644
--- a/docs/tutorials/simulation_cmab.ipynb
+++ b/docs/tutorials/simulation_cmab.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -80,77 +80,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Probability of positive reward for each group/action:\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>action A</th>\n",
-       "      <th>action B</th>\n",
-       "      <th>action C</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0.80</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.80</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   action A  action B  action C\n",
-       "0      0.05      0.80      0.05\n",
-       "1      0.80      0.05      0.05\n",
-       "2      0.80      0.05      0.80"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# init probability of rewards from the environment\n",
     "prob_rewards = pd.DataFrame(\n",
@@ -171,7 +103,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -181,24 +113,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setup simulation  completed.\n",
-      "Simulated input probability rewards:\n",
-      "        action A  action B  action C\n",
-      "group                              \n",
-      "0      0.041176  0.835294  0.052941\n",
-      "1      0.819277  0.036145  0.054217\n",
-      "2      0.786585  0.042683  0.817073 \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# init simulation\n",
     "sim = SimulationCmab(\n",
@@ -222,205 +139,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #1\n",
-      "Start predict batch 1 ...\n",
-      "Start update batch 1 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 11 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 10 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #2\n",
-      "Start predict batch 2 ...\n",
-      "Start update batch 2 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 9 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #3\n",
-      "Start predict batch 3 ...\n",
-      "Start update batch 3 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 9 seconds.\n",
-      "The number of effective samples is smaller than 25% for some parameters.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #4\n",
-      "Start predict batch 4 ...\n",
-      "Start update batch 4 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Iteration #5\n",
-      "Start predict batch 5 ...\n",
-      "Start update batch 5 ... \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 4 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulation results (first 10 observations):\n",
-      "      action  reward  group  selected_prob_reward  max_prob_reward  regret  \\\n",
-      "0  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "1  action C     1.0      2                  0.80              0.8    0.00   \n",
-      "2  action B     1.0      0                  0.80              0.8    0.00   \n",
-      "3  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "4  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "5  action B     1.0      0                  0.80              0.8    0.00   \n",
-      "6  action A     0.0      0                  0.05              0.8    0.75   \n",
-      "7  action C     0.0      2                  0.80              0.8    0.00   \n",
-      "8  action C     0.0      1                  0.05              0.8    0.75   \n",
-      "9  action C     1.0      2                  0.80              0.8    0.00   \n",
-      "\n",
-      "   cum_regret  \n",
-      "0        0.75  \n",
-      "1        0.75  \n",
-      "2        0.75  \n",
-      "3        1.50  \n",
-      "4        2.25  \n",
-      "5        2.25  \n",
-      "6        3.00  \n",
-      "7        3.00  \n",
-      "8        3.75  \n",
-      "9        3.75   \n",
-      "\n",
-      "Count of actions selected by the bandit: \n",
-      " {'group 0': {'action B': 85, 'action A': 53, 'action C': 32}, 'group 1': {'action A': 109, 'action C': 31, 'action B': 26}, 'group 2': {'action A': 70, 'action C': 59, 'action B': 35}} \n",
-      "\n",
-      "Observed proportion of positive rewards for each action:\n",
-      " {'group 0': {'action B': 0.788235294117647, 'action A': 0.03773584905660377, 'action C': 0.03125}, 'group 1': {'action A': 0.7981651376146789, 'action B': 0.07692307692307693, 'action C': 0.03225806451612903}, 'group 2': {'action A': 0.7142857142857143, 'action C': 0.8305084745762712, 'action B': 0.02857142857142857}} \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.run()"
    ]
diff --git a/docs/tutorials/simulation_smab.ipynb b/docs/tutorials/simulation_smab.ipynb
index df7d1e8..e15d57f 100644
--- a/docs/tutorials/simulation_smab.ipynb
+++ b/docs/tutorials/simulation_smab.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,35 +95,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulation results (first 10 observations):\n",
-      "      action  reward\n",
-      "0  Action B     0.0\n",
-      "1  Action C     1.0\n",
-      "2  Action C     0.0\n",
-      "3  Action A     1.0\n",
-      "4  Action B     1.0\n",
-      "5  Action C     1.0\n",
-      "6  Action A     1.0\n",
-      "7  Action A     1.0\n",
-      "8  Action B     0.0\n",
-      "9  Action B     0.0 \n",
-      "\n",
-      "Count of actions selected by the bandit: \n",
-      " {'Action C': 38670, 'Action B': 683, 'Action A': 647} \n",
-      "\n",
-      "Observed proportion of positive rewards for each action:\n",
-      " {'Action A': 0.6120556414219475, 'Action B': 0.4978038067349927, 'Action C': 0.7995603827256271} \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# run simulation\n",
     "sim.run()"
diff --git a/docs/tutorials/smab.ipynb b/docs/tutorials/smab.ipynb
index ed119e1..c4bc60c 100644
--- a/docs/tutorials/smab.ipynb
+++ b/docs/tutorials/smab.ipynb
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,17 +98,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Recommended action: ['Action C', 'Action C', 'Action C', 'Action B', 'Action B', 'Action C', 'Action B', 'Action C', 'Action A', 'Action B']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# predict actions\n",
     "pred_actions, _ = smab.predict(n_samples=1000)\n",
@@ -124,19 +116,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Action A: n_successes=285, n_failures=31\n",
-      "Action B: n_successes=123, n_failures=210\n",
-      "Action C: n_successes=261, n_failures=90\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# simulate rewards from environment\n",
     "n_successes, n_failures = {}, {}\n",
@@ -155,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/docs/tutorials/smab_mo_cc.ipynb b/docs/tutorials/smab_mo_cc.ipynb
index 880654c..f92e04d 100644
--- a/docs/tutorials/smab_mo_cc.ipynb
+++ b/docs/tutorials/smab_mo_cc.ipynb
@@ -3,7 +3,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "e2595bf3-9767-4338-9a51-ce706dc306cf",
+   "id": "0",
    "metadata": {},
    "source": [
     "# Stochastic Bernoulli Bandit (Multi-Objective with Cost-Control)"
@@ -11,8 +11,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "8f8462e5-f38e-4b04-9002-07ababe3ee0c",
+   "execution_count": null,
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,21 +25,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "75d6f625",
+   "execution_count": null,
+   "id": "2",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'%.2f'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# print 2 decimal places in the notebook\n",
     "%precision %.2f"
@@ -47,7 +36,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b6b37329-6a3b-4f2a-87a5-e0dcbbb1bb69",
+   "id": "3",
    "metadata": {},
    "source": [
     "## 1. Initialization\n",
@@ -57,7 +46,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "2ca215bf-6321-4819-a539-ebf1f378436a",
+   "id": "4",
    "metadata": {},
    "source": [
     "### 1.1 Initialize via class constructor\n",
@@ -67,8 +56,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "701111ff-b659-49b7-8cf5-8349536b4cd8",
+   "execution_count": null,
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -83,63 +72,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "55112a02-8df2-4895-9414-ddabbfc8ecac",
+   "execution_count": null,
+   "id": "6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulliMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">MultiObjectiveCostControlBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulliMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m30\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m10\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m20\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mMultiObjectiveCostControlBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "f2ee7bdc-3881-47a5-b7d4-84862f70e643",
+   "id": "7",
    "metadata": {},
    "source": [
     "### 1.2 Initialize via utility function (for cold start)"
@@ -148,7 +91,7 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "564914fd-73cc-4854-8ec7-548970f794a6",
+   "id": "8",
    "metadata": {},
    "source": [
     "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`."
@@ -156,8 +99,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "dbfb0ddd-4c16-441f-8c68-16020e425d57",
+   "execution_count": null,
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,63 +113,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "fcc3649c-d08c-46db-a534-f61d97962c99",
+   "execution_count": null,
+   "id": "10",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulliMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">MultiObjectiveCostControlBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulliMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m30\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m10\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m20\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mMultiObjectiveCostControlBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "aa91a5ed-83cc-4016-aa3e-17b8a102bb77",
+   "id": "11",
    "metadata": {},
    "source": [
     "## 2. Function `predict()`"
@@ -234,46 +131,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "a735c03d-cde4-4147-a50d-4b82dd9c1792",
+   "execution_count": null,
+   "id": "12",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on method predict in module pybandits.smab:\n",
-      "\n",
-      "predict(n_samples: pydantic.types.PositiveInt = 1, forbidden_actions: Optional[Set[pybandits.base.ActionId]] = None) -> Tuple[List[pybandits.base.ActionId], List[Dict[pybandits.base.ActionId, pybandits.base.Probability]]] method of pybandits.smab.SmabBernoulliMOCC instance\n",
-      "    Predict actions.\n",
-      "    \n",
-      "    Parameters\n",
-      "    ----------\n",
-      "    n_samples : int > 0, default=1\n",
-      "        Number of samples to predict.\n",
-      "    forbidden_actions : Optional[Set[ActionId]], default=None\n",
-      "        Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only\n",
-      "        consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions.\n",
-      "        Note that: actions = allowed_actions U forbidden_actions.\n",
-      "    \n",
-      "    Returns\n",
-      "    -------\n",
-      "    actions: List[ActionId] of shape (n_samples,)\n",
-      "        The actions selected by the multi-armed bandit model.\n",
-      "    probs: List[Dict[ActionId, Probability]] of shape (n_samples,)\n",
-      "        The probabilities of getting a positive reward for each action.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "help(mab.predict)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "f3d9cb8b-7d9b-437b-bbc2-e7a55475a1fb",
+   "execution_count": null,
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -283,54 +152,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "a9284b11-05ba-4cda-9597-b69e6d7632a3",
+   "execution_count": null,
+   "id": "14",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['a3', 'a3', 'a2', 'a3', 'a2']"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "actions"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "84cdbed4-9aa5-42e1-84db-1f8f72c52d93",
+   "execution_count": null,
+   "id": "15",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'a1': [0.75, 0.55], 'a2': [0.78, 0.29], 'a3': [0.79, 0.83]},\n",
-       " {'a1': [0.95, 0.28], 'a2': [0.22, 0.23], 'a3': [0.99, 0.95]},\n",
-       " {'a1': [0.22, 0.64], 'a2': [0.62, 0.50], 'a3': [0.30, 0.12]},\n",
-       " {'a1': [0.19, 0.79], 'a2': [0.02, 0.70], 'a3': [0.27, 0.72]},\n",
-       " {'a1': [0.38, 0.03], 'a2': [0.80, 0.55], 'a3': [0.79, 0.04]}]"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "probs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "bfc53fc8-b1bf-42ea-907a-fa5fb7173199",
+   "execution_count": null,
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -340,53 +183,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "696d58f4-ca5f-41d4-983f-bc7a5351ab28",
+   "execution_count": null,
+   "id": "17",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['a3', 'a2', 'a2', 'a2', 'a2']"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "actions"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "f5826785-a5c6-4c06-9bab-9f05134e783e",
+   "execution_count": null,
+   "id": "18",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'a2': [0.12, 0.45], 'a3': [0.38, 0.90]},\n",
-       " {'a2': [0.10, 0.96], 'a3': [0.58, 0.20]},\n",
-       " {'a2': [0.92, 0.85], 'a3': [0.31, 0.65]},\n",
-       " {'a2': [0.60, 0.04], 'a3': [0.45, 0.97]},\n",
-       " {'a2': [0.87, 0.51], 'a3': [0.74, 0.35]}]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "probs"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "d89f7199-bec3-407d-92a9-bdf917c13de6",
+   "id": "19",
    "metadata": {},
    "source": [
     "## 3. Function `update()`"
@@ -394,42 +211,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "140eb2fc-3659-4c13-86d1-ec5a575c79c1",
+   "execution_count": null,
+   "id": "20",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on method update in module pybandits.smab:\n",
-      "\n",
-      "update(actions: List[pybandits.base.ActionId], rewards: List[List[pybandits.base.BinaryReward]]) method of pybandits.smab.SmabBernoulliMOCC instance\n",
-      "    Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary\n",
-      "    rewards.\n",
-      "    \n",
-      "    Parameters\n",
-      "    ----------\n",
-      "    actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5']\n",
-      "        The selected action for each sample.\n",
-      "    rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives)\n",
-      "        The binary reward for each sample.\n",
-      "            If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.\n",
-      "                rewards = [1, 0, 1, 1, 1, ...]\n",
-      "            If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):\n",
-      "                rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "help(mab.update)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "2526ed6d-82d4-4485-bc6e-b5cb53dd78a5",
+   "execution_count": null,
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -439,56 +232,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "3bd0ab45-94e8-415b-adea-a089c54f6274",
+   "execution_count": null,
+   "id": "22",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulliMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">MultiObjectiveCostControlBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulliMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m30\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m7\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m7\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m10\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m20\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mMultiObjectiveCostControlBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# update\n",
     "mab.update(actions=actions, rewards=rewards)\n",
@@ -497,7 +244,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9823d84c-862b-4bb6-ab36-024f34460595",
+   "id": "23",
    "metadata": {},
    "source": [
     "## 4. Example of usage\n",
@@ -507,8 +254,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "a785463d-d710-4844-80bf-42c09b0e0b45",
+   "execution_count": null,
+   "id": "24",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -531,56 +278,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "034add3d-e6f3-471c-b8b9-30c286faf2cc",
+   "execution_count": null,
+   "id": "25",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">SmabBernoulliMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">actions</span>=<span style=\"font-weight: bold\">{</span>\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a1'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">450</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">488</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">450</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">488</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a2'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8541</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8325</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8541</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8325</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>,\n",
-       "        <span style=\"color: #008000; text-decoration-color: #008000\">'a3'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">BetaMOCC</span><span style=\"font-weight: bold\">(</span>\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">counters</span>=<span style=\"font-weight: bold\">[</span><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1110</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1102</span><span style=\"font-weight: bold\">)</span>, <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">Beta</span><span style=\"font-weight: bold\">(</span><span style=\"color: #808000; text-decoration-color: #808000\">n_successes</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1110</span>, <span style=\"color: #808000; text-decoration-color: #808000\">n_failures</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1102</span><span style=\"font-weight: bold\">)]</span>,\n",
-       "            <span style=\"color: #808000; text-decoration-color: #808000\">cost</span>=<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">20.0</span>\n",
-       "        <span style=\"font-weight: bold\">)</span>\n",
-       "    <span style=\"font-weight: bold\">}</span>,\n",
-       "    <span style=\"color: #808000; text-decoration-color: #808000\">strategy</span>=<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">MultiObjectiveCostControlBandit</span><span style=\"font-weight: bold\">()</span>\n",
-       "<span style=\"font-weight: bold\">)</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[1;35mSmabBernoulliMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "    \u001b[33mactions\u001b[0m=\u001b[1m{\u001b[0m\n",
-       "        \u001b[32m'a1'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m450\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m488\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m450\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m488\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m30\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a2'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m8541\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m8325\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m8541\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m8325\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m10\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m,\n",
-       "        \u001b[32m'a3'\u001b[0m: \u001b[1;35mBetaMOCC\u001b[0m\u001b[1m(\u001b[0m\n",
-       "            \u001b[33mcounters\u001b[0m=\u001b[1m[\u001b[0m\u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1110\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1102\u001b[0m\u001b[1m)\u001b[0m, \u001b[1;35mBeta\u001b[0m\u001b[1m(\u001b[0m\u001b[33mn_successes\u001b[0m=\u001b[1;36m1110\u001b[0m, \u001b[33mn_failures\u001b[0m=\u001b[1;36m1102\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n",
-       "            \u001b[33mcost\u001b[0m=\u001b[1;36m20\u001b[0m\u001b[1;36m.0\u001b[0m\n",
-       "        \u001b[1m)\u001b[0m\n",
-       "    \u001b[1m}\u001b[0m,\n",
-       "    \u001b[33mstrategy\u001b[0m=\u001b[1;35mMultiObjectiveCostControlBandit\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\n",
-       "\u001b[1m)\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(mab)"
    ]
diff --git a/pybandits/offline_policy_estimator.py b/pybandits/offline_policy_estimator.py
new file mode 100644
index 0000000..94a7817
--- /dev/null
+++ b/pybandits/offline_policy_estimator.py
@@ -0,0 +1,807 @@
+"""
+Comprehensive Offline Policy Evaluation (OPE) estimators.
+
+This module provides a complete set of estimators for OPE.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional, Tuple, Type
+
+import numpy as np
+from scipy.stats import bootstrap
+
+from pybandits.base import Float01, PyBanditsBaseModel
+from pybandits.pydantic_version_compatibility import (
+    PYDANTIC_VERSION_1,
+    PYDANTIC_VERSION_2,
+    NonNegativeFloat,
+    PositiveFloat,
+    PositiveInt,
+    PrivateAttr,
+    pydantic_version,
+    validate_call,
+)
+
+
+class BaseOfflinePolicyEstimator(PyBanditsBaseModel, ABC):
+    """Base class for all OPE estimators.
+
+    This class defines the interface for all OPE estimators and provides a common method for estimating the policy value.
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    alpha: Float01 = 0.05
+    n_bootstrap_samples: int = 10000
+    random_state: Optional[int] = None
+    _name: str = PrivateAttr()
+
+    @classmethod
+    def _check_array(
+        cls,
+        name: str,
+        data: Dict[str, np.ndarray],
+        ndim: PositiveInt,
+        dtype: type,
+        n_samples: PositiveInt,
+        n_actions: Optional[PositiveInt] = None,
+    ):
+        if name in data:
+            array = data[name]
+            if array.ndim != ndim:
+                raise ValueError(f"{name} must be a {ndim}D array.")
+            if array.shape[0] != n_samples:
+                raise ValueError(f"action and {name} must have the same length.")
+            if array.dtype != dtype:
+                raise ValueError(f"{name} must be a {dtype} array")
+            if ndim > 1:
+                if array.shape[1] != n_actions:
+                    raise ValueError(f"{name} must have the same number of actions as the action array.")
+
+    @classmethod
+    def _check_sum(cls, name: str, data: Dict[str, np.ndarray]):
+        if name in data:
+            array = data[name]
+            if not array.sum(axis=-1).all():
+                raise ValueError(f"{name} must have at least one non-zero element on each column.")
+
+    @classmethod
+    def _check_inputs(cls, action: np.ndarray, **kwargs):
+        """
+        Check the inputs for the estimator.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+        """
+        if action.ndim != 1:
+            raise ValueError("action must be a 1D array.")
+        if action.dtype != int:
+            raise ValueError("action must be an integer array.")
+        n_samples = action.shape[0]
+        n_actions = np.unique(action).shape[0]
+
+        for name, dtype in zip(["reward", "propensity_score", "expected_importance_weight"], [int, float, float]):
+            cls._check_array(name, kwargs, 1, dtype, n_samples)
+
+        for name in ["estimated_policy", "expected_reward"]:
+            cls._check_array(name, kwargs, 2, float, n_samples, n_actions)
+
+        for name in ["propensity_score", "estimated_policy", "expected_importance_weight"]:
+            cls._check_sum(name, kwargs)
+
+    @validate_call(config=dict(arbitrary_types_allowed=True))
+    def estimate_policy_value_with_confidence_interval(self, **kwargs) -> Tuple[float, float, float, float]:
+        """
+        Estimate the policy value with a confidence interval.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+
+        Returns
+        -------
+        Tuple[float, float, float, float]
+            Estimated policy value, mean, lower bound, and upper bound of the confidence interval.
+        """
+        self._check_inputs(**kwargs)
+        sample_reward = self.estimate_sample_rewards(**kwargs)
+        estimated_policy_value = sample_reward.mean()
+        bootstrap_result = bootstrap(
+            data=(sample_reward,),
+            statistic=np.mean,
+            confidence_level=1 - self.alpha,
+            n_resamples=self.n_bootstrap_samples,
+            random_state=self.random_state,
+        )
+        low, high = bootstrap_result.confidence_interval
+        std = bootstrap_result.standard_error
+        return estimated_policy_value, low, high, std
+
+    @abstractmethod
+    def estimate_sample_rewards(self, **kwargs) -> np.ndarray:
+        """
+        Estimate sample rewards.
+
+        Returns
+        -------
+        np.ndarray
+            Estimated sample rewards.
+        """
+        pass
+
+    @property
+    def name(self) -> str:
+        """
+        Get the name of the estimator.
+
+        Returns
+        -------
+        str
+            Name of the estimator.
+        """
+        return self._name
+
+
+class ReplayMethod(BaseOfflinePolicyEstimator):
+    """
+    Replay Method estimator.
+
+    This estimator is a simple baseline that estimates the policy value by averaging the rewards of the matched samples.
+
+    Reference: Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms (Li, Chu, Langford, and Wang, 2011)
+               https://arxiv.org/pdf/1003.5956
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+
+    """
+
+    _name = "rep"
+
+    def estimate_sample_rewards(
+        self, action: np.ndarray, reward: np.ndarray, estimated_policy: np.ndarray, **kwargs
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        estimated_policy : np.ndarray
+            Array of action distributions.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated sample rewards.
+        """
+        n_samples = action.shape[0]
+        matched_evaluation_policy = estimated_policy[np.arange(n_samples), action]
+        matched_action = matched_evaluation_policy == 1
+        sample_reward = (
+            reward * matched_action / matched_action.mean() if matched_action.any() else np.zeros_like(matched_action)
+        )
+        return sample_reward
+
+
+class GeneralizedInverseProbabilityWeighting(BaseOfflinePolicyEstimator, ABC):
+    """
+    Abstract generalization of the Inverse Probability Weighting (IPW) estimator.
+
+    Reference: Learning from Logged Implicit Exploration Data (Strehl, Langford, Li, and Kakade, 2010)
+               https://arxiv.org/pdf/1003.0120
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    @abstractmethod
+    def _get_importance_weights(self, **kwargs) -> np.ndarray:
+        """
+        Get the importance weights.
+
+        Returns
+        -------
+        np.ndarray
+            Array of importance weights.
+        """
+        pass
+
+    def estimate_sample_rewards(self, reward: np.ndarray, shrinkage_method: Optional[Callable], **kwargs) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        shrinkage_method : Optional[Callable]
+            Shrinkage method for the importance weights.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated sample rewards.
+        """
+        importance_weight = self._get_importance_weights(**kwargs)
+        importance_weight = shrinkage_method(importance_weight) if shrinkage_method is not None else importance_weight
+        sample_reward = reward * importance_weight
+        return sample_reward
+
+
+class InverseProbabilityWeighting(GeneralizedInverseProbabilityWeighting):
+    """
+    Inverse Probability Weighing (IPW) estimator.
+
+    Reference: Learning from Logged Implicit Exploration Data (Strehl, Langford, Li, and Kakade, 2010)
+               https://arxiv.org/pdf/1003.0120
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "ipw"
+
+    def estimate_sample_rewards(
+        self,
+        action: np.ndarray,
+        reward: np.ndarray,
+        propensity_score: np.ndarray,
+        estimated_policy: np.ndarray,
+        shrinkage_method: Optional[Callable] = None,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        propensity_score : np.ndarray
+            Array of propensity scores.
+        estimated_policy : np.ndarray
+            Array of action distributions.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated sample rewards.
+        """
+        return super().estimate_sample_rewards(
+            reward=reward,
+            action=action,
+            propensity_score=propensity_score,
+            estimated_policy=estimated_policy,
+            shrinkage_method=shrinkage_method,
+        )
+
+    def _get_importance_weights(
+        self,
+        action: np.ndarray,
+        propensity_score: np.ndarray,
+        estimated_policy: np.ndarray,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Get the importance weights.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken
+        propensity_score : np.ndarray
+            Array of propensity scores.
+        estimated_policy : np.ndarray
+            Array of action distributions.
+
+        Returns
+        -------
+        importance_weight : np.ndarray
+            Array of importance weights.
+        """
+        n_samples = action.shape[0]
+        importance_weight = estimated_policy[np.arange(n_samples), action] / propensity_score
+        return importance_weight
+
+
+class SelfNormalizedInverseProbabilityWeighting(InverseProbabilityWeighting):
+    """
+    Self-Normalized Inverse Propensity Score (SNIPS) estimator.
+
+    Reference: The Self-normalized Estimator for Counterfactual Learning (Swaminathan and Joachims, 2015)
+               https://papers.nips.cc/paper_files/paper/2015/file/39027dfad5138c9ca0c474d71db915c3-Paper.pdf
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "snips"
+
+    def estimate_sample_rewards(
+        self,
+        action: np.ndarray,
+        reward: np.ndarray,
+        propensity_score: np.ndarray,
+        estimated_policy: np.ndarray,
+        shrinkage_method: Optional[Callable] = None,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        propensity_score : np.ndarray
+            Array of propensity scores.
+        estimated_policy : np.ndarray
+            Array of action distributions.
+        shrinkage_method : Optional[Callable]
+            Shrinkage method for the importance weights.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated sample rewards.
+        """
+
+        def self_normalized_shrink_weights(importance_weight: np.ndarray) -> np.ndarray:
+            importance_weight = (
+                shrinkage_method(importance_weight) if shrinkage_method is not None else importance_weight
+            )
+            return importance_weight / importance_weight.mean()
+
+        sample_reward = super().estimate_sample_rewards(
+            action=action,
+            reward=reward,
+            propensity_score=propensity_score,
+            estimated_policy=estimated_policy,
+            shrinkage_method=self_normalized_shrink_weights,
+        )
+        return sample_reward
+
+
+class DirectMethod(BaseOfflinePolicyEstimator):
+    """
+    Direct Method (DM) estimator.
+
+    This estimator uses the evaluation policy to Estimate the sample rewards.
+
+    Reference: The Offset Tree for Learning with Partial Labels (Beygelzimer and Langford, 2009)
+               https://arxiv.org/pdf/0812.4044
+
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "dm"
+
+    def estimate_sample_rewards(
+        self,
+        estimated_policy: np.ndarray,
+        expected_reward: np.ndarray,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        estimated_policy : np.ndarray
+            Array of action distributions.
+        expected_reward : np.ndarray
+            Array of expected rewards.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated sample rewards.
+        """
+        n_samples = expected_reward.shape[0]
+        base_expected_reward = expected_reward[np.arange(n_samples), :]
+        evaluation_policy = estimated_policy[np.arange(n_samples), :]
+        expected_reward = np.average(
+            base_expected_reward,
+            weights=evaluation_policy,
+            axis=1,
+        )
+        return expected_reward
+
+
+class GeneralizedDoublyRobust(BaseOfflinePolicyEstimator, ABC):
+    """
+    Abstract generalization of the Doubly Robust (DR) estimator.
+
+    Reference: Doubly Robust Policy Evaluation and Optimization (Dudík, Erhan, Langford, and Li, 2014)
+               https://arxiv.org/pdf/1503.02834
+
+               More Robust Doubly Robust Off-policy Evaluation (Farajtabar, Chow, and Ghavamzadeh, 2018)
+               https://arxiv.org/pdf/1802.03493
+
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _alternative_method_cls: Type[InverseProbabilityWeighting]
+    _dm: DirectMethod = PrivateAttr()
+    _other_method: BaseOfflinePolicyEstimator = PrivateAttr()
+
+    if pydantic_version == PYDANTIC_VERSION_1:
+
+        def __init__(self, **data):
+            super().__init__(**data)
+            self._dm = DirectMethod(
+                alpha=self.alpha, n_bootstrap_samples=self.n_bootstrap_samples, random_state=self.random_state
+            )
+            self._other_method = self._alternative_method_cls(
+                alpha=self.alpha, n_bootstrap_samples=self.n_bootstrap_samples, random_state=self.random_state
+            )
+
+    elif pydantic_version == PYDANTIC_VERSION_2:
+
+        def model_post_init(self, __context: Any) -> None:
+            self._dm = DirectMethod(
+                alpha=self.alpha, n_bootstrap_samples=self.n_bootstrap_samples, random_state=self.random_state
+            )
+            self._other_method = self._alternative_method_cls(
+                alpha=self.alpha, n_bootstrap_samples=self.n_bootstrap_samples, random_state=self.random_state
+            )
+
+    else:
+        raise ValueError(f"Unsupported pydantic version: {pydantic_version}")
+
+    def estimate_sample_rewards(
+        self,
+        action: np.ndarray,
+        reward: np.ndarray,
+        propensity_score: np.ndarray,
+        estimated_policy: np.ndarray,
+        expected_reward: np.ndarray,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        action : np.ndarray
+            Array of actions taken.
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        propensity_score : np.ndarray
+            Array of propensity scores.
+        estimated_policy : np.ndarray
+            Array of action distributions.
+        expected_reward : np.ndarray
+            Array of expected rewards.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated rewards.
+        """
+        dm_sample_reward = self._dm.estimate_sample_rewards(
+            action=action, estimated_policy=estimated_policy, expected_reward=expected_reward
+        )
+        other_sample_reward = self._other_method.estimate_sample_rewards(
+            action=action,
+            reward=reward - dm_sample_reward,
+            propensity_score=propensity_score,
+            estimated_policy=estimated_policy,
+            shrinkage_method=self._shrink_weights,
+        )
+        sample_reward = dm_sample_reward + other_sample_reward
+        return sample_reward
+
+    def _shrink_weights(self, importance_weight: np.ndarray) -> np.ndarray:
+        return importance_weight
+
+
+class DoublyRobust(GeneralizedDoublyRobust):
+    """
+    Doubly Robust (DR) estimator.
+
+    Reference: Doubly Robust Policy Evaluation and Optimization (Dudík, Erhan, Langford, and Li, 2014)
+               https://arxiv.org/pdf/1503.02834
+
+               More Robust Doubly Robust Off-policy Evaluation (Farajtabar, Chow, and Ghavamzadeh, 2018)
+               https://arxiv.org/pdf/1802.03493
+
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "dr"
+    _alternative_method_cls = InverseProbabilityWeighting
+
+
+class SelfNormalizedDoublyRobust(GeneralizedDoublyRobust):
+    """
+    Self-Normalized Doubly Robust (SNDR) estimator.
+
+    This estimator uses the self-normalized importance weights to combine the DR and IPS estimators.
+
+    Reference: Intrinsically Efficient, Stable, and Bounded Off-Policy Evaluation for Reinforcement Learning (Kallus and Uehara, 2019)
+               https://arxiv.org/pdf/1906.03735
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "sndr"
+    _alternative_method_cls = SelfNormalizedInverseProbabilityWeighting
+
+
+class SwitchDoublyRobust(DoublyRobust):
+    """
+    Switch Doubly Robust (Switch-DR) estimator.
+
+    This estimator uses a switching rule based on the propensity score to combine the DR and IPS estimators.
+
+    Reference: Optimal and Adaptive Off-policy Evaluation in Contextual Bandits (Wang, Agarwal, and Dudik, 2017)
+               https://arxiv.org/pdf/1507.02646
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : Optional[int], default=None
+        Random seed for bootstrap sampling.
+    switch_threshold : float, default=inf
+        Threshold for the importance weight to switch between the DR and IPS estimators.
+    """
+
+    _name = "switch-dr"
+    switch_threshold: float = float("inf")
+
+    def _shrink_weights(self, importance_weight: np.ndarray) -> np.ndarray:
+        switch_indicator = importance_weight >= self.switch_threshold
+        return switch_indicator * importance_weight
+
+
+class DoublyRobustWithOptimisticShrinkage(DoublyRobust):
+    """
+    Optimistic version of DRos estimator.
+
+    This estimator uses a shrinkage factor to shrink the importance weight in the native DR.
+
+    Reference: Doubly Robust Off-Policy Evaluation with Shrinkage (Su, Dimakopoulou, Krishnamurthy, and Dudik, 2020)
+               https://arxiv.org/pdf/1907.09623
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    shrinkage_factor : float, default=0.0
+        Shrinkage factor for the importance weights.
+        If set to 0 or infinity, the estimator is equivalent to the native DM or DR estimators, respectively.
+    """
+
+    shrinkage_factor: NonNegativeFloat = 0.0
+    _name = "dros-opt"
+
+    def _shrink_weights(self, importance_weight: np.ndarray) -> np.ndarray:
+        if self.shrinkage_factor == 0:
+            return np.zeros_like(importance_weight)
+        elif self.shrinkage_factor == float("inf"):
+            return importance_weight
+        return self.shrinkage_factor * importance_weight / (importance_weight**2 + self.shrinkage_factor)
+
+
+class DoublyRobustWithPessimisticShrinkage(DoublyRobust):
+    """
+    Pessimistic version of DRos estimator.
+
+    This estimator uses a shrinkage factor to shrink the importance weight in the native DR.
+
+    Reference: Doubly Robust Off-Policy Evaluation with Shrinkage (Su, Dimakopoulou, Krishnamurthy, and Dudik, 2020)
+               https://arxiv.org/pdf/1907.09623
+
+    Parameters
+    ----------
+    alpha : Float01, default=0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, default=10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, default=None
+        Random seed for bootstrap sampling.
+    shrinkage_factor : float, default=0.0
+        Shrinkage factor for the importance weights.
+    """
+
+    _name = "dros-pess"
+    shrinkage_factor: PositiveFloat = float("inf")
+
+    def _shrink_weights(self, importance_weight: np.ndarray) -> np.ndarray:
+        importance_weight = np.minimum(self.shrinkage_factor, importance_weight)
+        return importance_weight
+
+
+class SubGaussianInverseProbabilityWeighting(InverseProbabilityWeighting):
+    """
+    SubGaussian Inverse Probability Weighing estimator.
+
+    Reference: Subgaussian and Differentiable Importance Sampling for Off-Policy Evaluation and Learning (Metelli, Russo, and Restelli, 2021)
+               https://proceedings.neurips.cc/paper_files/paper/2021/file/4476b929e30dd0c4e8bdbcc82c6ba23a-Paper.pdf
+
+    Parameters
+    ----------
+    alpha : Float01, defaults to 0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, defaults to 10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, defaults to None
+        Random seed for bootstrap sampling.
+    shrinkage_factor : Float01, defaults to 0.0
+        Shrinkage factor for the importance weights.
+
+    """
+
+    _name = "sg-ipw"
+    shrinkage_factor: Float01 = 0.0
+
+    def _shrink_weights(self, importance_weight: np.ndarray) -> np.ndarray:
+        return importance_weight / (1 - self.shrinkage_factor + self.shrinkage_factor * importance_weight)
+
+
+class SubGaussianDoublyRobust(GeneralizedDoublyRobust):
+    """
+    SubGaussian Doubly Robust estimator.
+
+    Reference: Subgaussian and Differentiable Importance Sampling for Off-Policy Evaluation and Learning (Metelli, Russo, and Restelli, 2021)
+               https://proceedings.neurips.cc/paper_files/paper/2021/file/4476b929e30dd0c4e8bdbcc82c6ba23a-Paper.pdf
+
+    Parameters
+    ----------
+    alpha : Float01, defaults to 0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, defaults to 10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, defaults to None
+        Random seed for bootstrap sampling.
+    """
+
+    _name = "sg-dr"
+    _alternative_method_cls = SubGaussianInverseProbabilityWeighting
+
+
+class BalancedInverseProbabilityWeighting(GeneralizedInverseProbabilityWeighting):
+    """
+    Balanced Inverse Probability Weighing estimator.
+
+    Reference: Balanced Off-Policy Evaluation in General Action Spaces (Sondhi, Arbour, and Dimmery, 2020)
+               https://arxiv.org/pdf/1906.03694
+
+
+    Parameters
+    ----------
+    alpha : Float01, defaults to 0.05
+        Significance level for confidence interval estimation.
+    n_bootstrap_samples : int, defaults to 10000
+        Number of bootstrap samples for confidence interval estimation.
+    random_state : int, defaults to None
+        Random seed for bootstrap sampling.
+
+    ----------
+    Arjun Sondhi, David Arbour, and Drew Dimmery
+    "Balanced Off-Policy Evaluation in General Action Spaces.", 2020.
+    """
+
+    _name = "b-ipw"
+
+    def _get_importance_weights(self, expected_importance_weight: np.ndarray, **kwargs) -> np.ndarray:
+        """
+        Get the importance weights.
+
+        Parameters
+        ----------
+        expected_importance_weight : np.ndarray
+            Array of expected importance weights.
+
+        Returns
+        -------
+        expected_importance_weight : np.ndarray
+            Array of expected importance weights.
+        """
+        return expected_importance_weight
+
+    def estimate_sample_rewards(
+        self,
+        reward: np.ndarray,
+        expected_importance_weight: np.ndarray,
+        **kwargs,
+    ) -> np.ndarray:
+        """
+        Estimate the sample rewards.
+
+        Parameters
+        ----------
+        reward : np.ndarray
+            Array of rewards corresponding to each action.
+        expected_importance_weight : np.ndarray
+            Array of expected importance weights.
+
+        Returns
+        -------
+        sample_reward : np.ndarray
+            Estimated rewards.
+        """
+        return super().estimate_sample_rewards(
+            reward=reward, expected_importance_weight=expected_importance_weight, shrinkage_method=None
+        )
diff --git a/pybandits/offline_policy_evaluator.py b/pybandits/offline_policy_evaluator.py
new file mode 100644
index 0000000..97d7f4b
--- /dev/null
+++ b/pybandits/offline_policy_evaluator.py
@@ -0,0 +1,1127 @@
+import os
+from copy import deepcopy
+from functools import partial
+from itertools import product
+from math import floor
+from multiprocessing import Pool, cpu_count
+from sys import version_info
+from typing import Any, Dict, List, Literal, Optional, Union
+
+import numpy as np
+import optuna
+import pandas as pd
+from bokeh.models import ColumnDataSource, TabPanel
+from bokeh.plotting import figure
+from loguru import logger
+from optuna import Trial
+from sklearn.base import ClassifierMixin, TransformerMixin
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import cross_val_score
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from tqdm import tqdm
+
+from pybandits.pydantic_version_compatibility import (
+    PYDANTIC_VERSION_1,
+    PYDANTIC_VERSION_2,
+    NonNegativeInt,
+    PositiveInt,
+    PrivateAttr,
+    field_validator,
+    model_validator,
+    pydantic_version,
+    validate_call,
+)
+
+if version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from pybandits import offline_policy_estimator
+from pybandits.base import ActionId, Float01, PyBanditsBaseModel
+from pybandits.mab import BaseMab
+from pybandits.offline_policy_estimator import BaseOfflinePolicyEstimator
+from pybandits.utils import (
+    extract_argument_names_from_function,
+    get_non_abstract_classes,
+    in_jupyter_notebook,
+    visualize_via_bokeh,
+)
+
+optuna.logging.enable_propagation()  # Propagate logs to the root logger.
+optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.
+
+
+class _FunctionEstimator(PyBanditsBaseModel, ClassifierMixin, arbitrary_types_allowed=True):
+    """
+    This class provides functionality for model optimization using hyperparameter tuning via Optuna,
+    and prediction with optimized or default machine learning models.
+    It is used to estimate the propensity score and expected reward.
+
+
+    Parameters
+    ----------
+    estimator_type : Optional[Literal["logreg", "gbm", "rf", "mlp"]]
+        The model type to optimize.
+
+    fast_fit : bool
+        Whether to use the default parameter set for the model.
+
+    action_one_hot_encoder : OneHotEncoder
+        Fitted one hot encoder for action encoding.
+
+    n_trials : int
+        Number of trials for the Optuna optimization process.
+
+    verbose : bool
+        Whether to log detailed information during the optimization process.
+
+    study_name : Optional[str]
+        Name of the study to be created by Optuna.
+
+    multi_action_prediction : bool
+        Whether to predict for all actions or only for real action.
+
+    """
+
+    estimator_type: Literal["logreg", "gbm", "rf", "mlp"]
+    fast_fit: bool
+    action_one_hot_encoder: OneHotEncoder = OneHotEncoder(sparse=False)
+    n_trials: int
+    verbose: bool
+    study_name: Optional[str] = None
+    multi_action_prediction: bool
+    _model: Union[LogisticRegression, GradientBoostingClassifier, RandomForestClassifier, MLPClassifier] = PrivateAttr()
+    _model_mapping = {
+        "mlp": MLPClassifier,
+        "rf": RandomForestClassifier,
+        "logreg": LogisticRegression,
+        "gbm": GradientBoostingClassifier,
+    }
+
+    def _pre_process(self, batch: Dict[str, Any]) -> np.ndarray:
+        """
+        Preprocess the feature vectors to be used for regression model training.
+        This method concatenates the context vector and action context vectors.
+
+        Parameters
+        ----------
+        batch : Dict[str, Any]
+            The batch of data containing context, action, and action context.
+
+        Returns
+        -------
+        np.ndarray
+            A concatenated array of context and action context, shape (n_rounds, n_features_context + dim_action_context).
+        """
+        context = batch["context"]
+        action = batch["action_ids"]
+        return np.concatenate([context, self.action_one_hot_encoder.transform(action.reshape((-1, 1)))], axis=1)
+
+    def _sample_parameter_space(self, trial: Trial) -> Dict[str, Union[str, int, float]]:
+        """
+        Define the hyperparameter search space for a given model type in Optuna.
+
+        The search space is dynamically selected based on the model type being optimized.
+
+        Parameters
+        ----------
+        trial : optuna.Trial
+            A single trial in the Optuna optimization process.
+
+        Returns
+        -------
+        dict
+            A dictionary representing the search space for the model's hyperparameters.
+        """
+
+        if self.estimator_type == "mlp":
+            return {
+                "hidden_layer_sizes": 2 ** trial.suggest_int("hidden_layer_sizes", 2, 6),
+                "activation": trial.suggest_categorical("activation", ["relu", "logistic", "tanh"]),
+                "solver": trial.suggest_categorical("solver", ["lbfgs", "sgd", "adam"]),
+                "alpha": np.sqrt(10) ** -trial.suggest_int("learning_rate_init", 0, 10),
+                "max_iter": 1000,
+                "learning_rate_init": np.sqrt(10) ** -trial.suggest_int("learning_rate_init", 0, 6),
+            }
+        elif self.estimator_type == "rf":
+            return {
+                "max_depth": trial.suggest_int("max_depth", 2, 5),
+                "criterion": trial.suggest_categorical("criterion", ["gini", "entropy"]),
+                "max_features": trial.suggest_int("max_features", 1, 3),
+                "n_estimators": trial.suggest_int("n_estimators", 10, 50),
+                "n_jobs": -1,
+            }
+        elif self.estimator_type == "logreg":
+            return {
+                "tol": trial.suggest_float("tol", 0.00001, 0.0001),
+                "C": trial.suggest_float("C", 0.05, 3),
+                "solver": trial.suggest_categorical("solver", ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]),
+                "max_iter": 1000,
+                "n_jobs": -1,
+            }
+        elif self.estimator_type == "gbm":
+            return {
+                "n_estimators": trial.suggest_int("n_estimators", 10, 100),
+                "learning_rate": np.sqrt(10) ** -trial.suggest_int("learning_rate_init", 0, 6),
+                "max_depth": trial.suggest_int("max_depth", 2, 10),
+            }
+
+    def _objective(self, trial: Trial, feature_set: np.ndarray, label: np.ndarray) -> float:
+        """
+        Objective function for Optuna optimization.
+
+        This function trains a model using cross-validation and returns the negative accuracy
+        to be minimized.
+
+        Parameters
+        ----------
+        trial : Trial
+            A single trial in the Optuna optimization process.
+
+        feature_set : np.ndarray
+            The training dataset, containing context and encoded actions.
+
+        label : np.ndarray
+            The labels for the dataset.
+
+        Returns
+        -------
+        score : float
+            The score to be maximized by Optuna.
+        """
+        params = self._sample_parameter_space(trial)
+        model = self._model_mapping[self.estimator_type](**params)
+        score = cross_val_score(model, feature_set, label).mean()
+        trial.set_user_attr("model_params", params)
+
+        return score
+
+    def _optimize(self, feature_set: np.ndarray, label: np.ndarray, study: optuna.Study) -> dict:
+        """
+        Optimize the model's hyperparameters using Optuna.
+
+        Parameters
+        ----------
+        feature_set : np.ndarray
+            The training dataset, containing 'context' and 'action_ids' keys.
+
+        study : optuna.Study
+            The Optuna study object to store optimization results.
+
+        Returns
+        -------
+        best_params : dict
+            The best set of hyperparameters found by Optuna.
+        """
+
+        study.optimize(lambda trial: self._objective(trial, feature_set, label), n_trials=self.n_trials)
+
+        best_params = study.best_trial.user_attrs["model_params"]
+        if self.verbose:
+            logger.info(f"Optuna best model with optimized parameters for {self.estimator_type}:\n {best_params}")
+
+        return best_params
+
+    @validate_call(config=dict(arbitrary_types_allowed=True))
+    def fit(self, X: dict, y: np.ndarray) -> Self:
+        """
+        Fit the model using the given dataset X and labels y.
+
+        Parameters
+        ----------
+        X : dict
+            The dataset containing 'context' and 'action_ids' keys.
+        y : np.ndarray
+            The labels for the dataset.
+
+        Returns
+        -------
+        self : _FunctionEstimator
+            The fitted model.
+        """
+        feature_set = self._pre_process(X)
+        if self.fast_fit:
+            model_parameters = {}
+        else:
+            pruner = optuna.pruners.MedianPruner()
+            sampler = optuna.samplers.TPESampler(multivariate=True, group=True)
+            study = optuna.create_study(
+                direction="maximize", study_name=self.study_name, pruner=pruner, sampler=sampler
+            )
+            model_parameters = self._optimize(feature_set, y, study)
+
+        model = self._model_mapping[self.estimator_type](**model_parameters)
+        model.fit(feature_set, y)
+        self._model = model
+        return self
+
+    @validate_call
+    def predict(self, X: dict) -> np.ndarray:
+        """
+        Predict the labels for the given dataset X.
+
+        Parameters
+        ----------
+        X : dict
+            The dataset containing 'context' and 'action_ids' keys.
+
+        Returns
+        -------
+        prediction : np.ndarray
+            The predicted labels for the dataset.
+        """
+        if not self._model:
+            raise AttributeError("Model has not been fitted yet.")
+
+        if self.multi_action_prediction:
+            specific_action_X = X.copy()
+            prediction = np.empty((X["n_rounds"], len(X["unique_actions"])))
+            for action_index, action in enumerate(X["unique_actions"]):
+                specific_action_X["action_ids"] = np.array([action] * X["n_rounds"])
+                specific_action_feature_set = self._pre_process(specific_action_X)
+                specific_action_prediction = self._model.predict_proba(specific_action_feature_set)[:, 1]
+                prediction[:, action_index] = specific_action_prediction
+        else:
+            feature_set = self._pre_process(X)
+            prediction = self._model.predict_proba(feature_set)[:, 1]
+        return prediction
+
+
+class OfflinePolicyEvaluator(PyBanditsBaseModel, arbitrary_types_allowed=True):
+    """
+    Class to conduct OPE with multiple OPE estimators
+
+    Reference: Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation
+    https://arxiv.org/abs/2008.07146 https://github.com/st-tech/zr-obp
+
+    Parameters
+    ----------
+    logged_data : pd.DataFrame
+        Logging data set
+    split_prop: Float01
+        Proportion of dataset used as training set
+    propensity_score_model_type: Literal["logreg", "gbm", "rf", "mlp", "batch_empirical", "empirical", "propensity_score"]
+        Method used to compute/estimate propensity score pi_b (propensity_score, logging / behavioral policy).
+    expected_reward_model_type: Literal["logreg", "gbm", "rf", "mlp"]
+        Method used to estimate expected reward for each action a in the training set.
+    n_trials : Optional[int]
+        Number of trials for the Optuna optimization process.
+    fast_fit : bool
+        Whether to use the default parameter set for the function estimator models.
+    ope_estimators: Optional[List[BaseOfflinePolicyEstimator]]
+        List of OPE estimators used to evaluate the policy value of evaluation policy.
+        All available estimators are if not specified.
+    batch_feature: str
+        Column name for batch as available in logged_data
+    action_feature: str
+        Column name for action as available in logged_data
+    reward_feature: Union[str, List[str]]
+        Column name for reward as available in logged_data
+    contextual_features: Optional[List[str]]
+        Column names for contextual features as available in logged_data
+    cost_feature: Optional[str]
+        Column name for cost as available in logged_data; used for bandit with cost control
+    group_feature: Optional[str]
+        Column name for group definition feature as available in logged_data; available from simulated data
+        to define samples with similar contextual profile
+    true_reward_feature: Optional[Union[str, List[str]]]
+        Column names for reward proba distribution features as available in simulated logged_data. Used to compute ground truth
+    propensity_score_feature : Optional[str]
+        Column name for propensity score as available in logged_data; used for evaluation of the policy value
+    verbose : bool
+        Whether to log detailed information during the optimization process.
+    """
+
+    logged_data: pd.DataFrame
+    split_prop: Float01
+    propensity_score_model_type: Literal[
+        "logreg", "gbm", "rf", "mlp", "batch_empirical", "empirical", "propensity_score"
+    ]
+    expected_reward_model_type: Literal["logreg", "gbm", "rf", "mlp"]
+    importance_weights_model_type: Literal["logreg", "gbm", "rf", "mlp"]
+    scaler: Optional[Union[TransformerMixin, Dict[str, TransformerMixin]]] = None
+    n_trials: Optional[int] = 100
+    fast_fit: bool = False
+    ope_estimators: Optional[List[BaseOfflinePolicyEstimator]]
+    batch_feature: str
+    action_feature: str
+    reward_feature: Union[str, List[str]]
+    contextual_features: Optional[List[str]] = None
+    cost_feature: Optional[str] = None
+    group_feature: Optional[str] = None
+    true_reward_feature: Optional[Union[str, List[str]]] = None
+    propensity_score_feature: Optional[str] = None
+    verbose: bool = False
+    _train_data: Optional[Dict[str, Any]] = PrivateAttr()
+    _test_data: Optional[Dict[str, Any]] = PrivateAttr()
+    _estimated_expected_reward: Optional[Dict[str, np.ndarray]] = PrivateAttr(default=None)
+    _action_one_hot_encoder = OneHotEncoder(sparse=False)
+    _propensity_score_epsilon = 1e-08
+
+    @field_validator("split_prop", mode="before")
+    @classmethod
+    def check_split_prop(cls, value):
+        if value == 0 or value == 1:
+            raise ValueError("split_prop should be strictly between 0 and 1")
+        return value
+
+    @field_validator("ope_estimators", mode="before")
+    @classmethod
+    def populate_ope_metrics(cls, value):
+        return (
+            value if value is not None else [class_() for class_ in get_non_abstract_classes(offline_policy_estimator)]
+        )
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_batch_feature(cls, values):
+        if values["batch_feature"] not in values["logged_data"]:
+            raise AttributeError("Batch feature missing from logged data.")
+        if not (
+            pd.api.types.is_datetime64_ns_dtype(values["logged_data"][values["batch_feature"]])
+            or pd.api.types.is_integer_dtype(values["logged_data"][values["batch_feature"]])
+        ):
+            raise TypeError(f"Column {values['batch_feature']} should be either date or int type")
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_action_feature(cls, values):
+        if values["action_feature"] not in values["logged_data"]:
+            raise AttributeError("Action feature missing from logged data.")
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_propensity_score_estimation_method(cls, values):
+        if values["propensity_score_model_type"] == "propensity_score":
+            if cls._get_value_with_default("propensity_score_feature", values) is None:
+                raise ValueError(
+                    "Propensity score feature should be defined when using it as propensity_score_model_type"
+                )
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_reward_features(cls, values):
+        reward_feature = values["reward_feature"]
+        reward_feature = reward_feature if isinstance(reward_feature, list) else [reward_feature]
+        if not all([reward in values["logged_data"] for reward in reward_feature]):
+            raise AttributeError("Reward feature missing from logged data.")
+        values["reward_feature"] = reward_feature
+        if "true_reward_feature" in values:
+            true_reward_feature = values["true_reward_feature"]
+            true_reward_feature = (
+                true_reward_feature
+                if isinstance(true_reward_feature, list)
+                else [true_reward_feature]
+                if true_reward_feature is not None
+                else None
+            )
+            if not all([true_reward in values["logged_data"] for true_reward in true_reward_feature]):
+                raise AttributeError("True reward feature missing from logged data.")
+            if len(reward_feature) != len(true_reward_feature):
+                raise ValueError("Reward and true reward features should have the same length")
+            values["true_reward_feature"] = true_reward_feature
+
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_optional_scalar_features(cls, values):
+        for feature in [
+            "cost_feature",
+            "group_feature",
+            "propensity_score_feature",
+        ]:
+            value = cls._get_value_with_default(feature, values)
+            if value is not None and value not in values["logged_data"]:
+                raise AttributeError(f"{feature} missing from logged data.")
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_contextual_features(cls, values):
+        value = cls._get_value_with_default("contextual_features", values)
+        if value is not None and not set(value).issubset(values["logged_data"].columns):
+            raise AttributeError("contextual_features missing from logged data.")
+        return values
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_model_optimization(cls, values):
+        n_trials_value = cls._get_value_with_default("n_trials", values)
+        fast_fit_value = cls._get_value_with_default("fast_fit", values)
+
+        if (n_trials_value is None or fast_fit_value is None) and values["propensity_score_model_type"] not in [
+            "logreg",
+            "gbm",
+            "rf",
+            "mlp",
+        ]:
+            raise ValueError("The requested propensity score model requires n_trials and fast_fit to be well defined")
+        if (n_trials_value is None or fast_fit_value is None) and cls._check_argument_required_by_estimators(
+            "expected_reward", values["ope_estimators"]
+        ):
+            raise ValueError(
+                "The requested offline policy evaluation metrics model require estimation of the expected reward. "
+                "Thus, n_trials and fast_fit need to be well defined."
+            )
+        return values
+
+    @classmethod
+    def _check_argument_required_by_estimators(cls, argument: str, ope_estimators: List[BaseOfflinePolicyEstimator]):
+        """
+        Check if argument is required by OPE estimators.
+
+        Parameters
+        ----------
+        argument : str
+            Argument to check if required by OPE estimators.
+        ope_estimators : List[BaseOfflinePolicyEstimator]
+            List of OPE estimators.
+
+        Returns
+        -------
+        bool
+            True if argument is required by OPE estimators, False otherwise.
+        """
+        return any(
+            [
+                argument
+                in extract_argument_names_from_function(ope_estimator.estimate_sample_rewards)
+                + extract_argument_names_from_function(ope_estimator.estimate_policy_value_with_confidence_interval)
+                for ope_estimator in ope_estimators
+            ]
+        )
+
+    if pydantic_version == PYDANTIC_VERSION_1:
+
+        def __init__(self, **data):
+            super().__init__(**data)
+
+            # Extract batches for train and test set
+            self._extract_batches()
+
+            # Estimate propensity score in the train and test set
+            self._estimate_propensity_score()
+
+            # Estimate expected reward estimator and predict in the test set, when required by OPE estimators
+            if self._check_argument_required_by_estimators("expected_reward", self.ope_estimators):
+                self._estimate_expected_reward()
+
+    elif pydantic_version == PYDANTIC_VERSION_2:
+
+        def model_post_init(self, __context: Any) -> None:
+            # Extract batches for train and test set
+            self._extract_batches()
+
+            # Estimate propensity score in the train and test set
+            self._estimate_propensity_score()
+
+            # Estimate expected reward estimator and predict in the test set, when required by OPE estimators
+            if self._check_argument_required_by_estimators("expected_reward", self.ope_estimators):
+                self._estimate_expected_reward()
+
+    else:
+        raise ValueError(f"Unsupported pydantic version: {pydantic_version}")
+
+    def _extract_batches(self):
+        """
+        Create training and test sets in dictionary form.
+
+        """
+        logged_data = self.logged_data.sort_values(by=self.batch_feature)
+        unique_batch = logged_data[self.batch_feature].unique()
+        split_batch = unique_batch[int(floor(len(unique_batch) * self.split_prop))]
+
+        # add list of actions in dict in order to avoid test set with n_actions
+        # lower than nb of total actions
+        unique_actions = sorted(self.logged_data[self.action_feature].unique().tolist())
+        action_label_encoder = LabelEncoder()
+        for batch_idx in tqdm(range(2)):
+            # extract samples batch
+            if batch_idx == 0:
+                extracted_batch = self.logged_data[self.logged_data[self.batch_feature] <= split_batch]
+            else:
+                extracted_batch = self.logged_data[self.logged_data[self.batch_feature] > split_batch]
+            extracted_batch = extracted_batch.reset_index(drop=True)
+
+            # dict data set information for OPE
+            action_ids = extracted_batch[self.action_feature].values
+            if batch_idx == 0:
+                self._action_one_hot_encoder.fit(np.array(unique_actions).reshape((-1, 1)))
+            reward = extracted_batch[self.reward_feature].values
+
+            # if cost control bandit
+            if self.cost_feature is not None:
+                cost = extracted_batch[self.cost_feature].values
+            else:
+                cost = None
+
+            # if contextual information required
+            if self.contextual_features is not None:
+                if self.scaler is not None:
+                    if type(self.scaler) is dict:
+                        if batch_idx == 0:
+                            x_scale = np.array(
+                                pd.concat(
+                                    [
+                                        self.scaler[feature].fit_transform(np.array(extracted_batch[[feature]]))
+                                        for feature in self.contextual_features
+                                    ],
+                                    axis=1,
+                                )
+                            )
+                        else:
+                            x_scale = np.array(
+                                pd.concat(
+                                    [
+                                        self.scaler[feature].transform(np.array(extracted_batch[[feature]]))
+                                        for feature in self.contextual_features
+                                    ],
+                                    axis=1,
+                                )
+                            )
+                    else:
+                        if batch_idx == 0:
+                            x_scale = self.scaler.fit_transform(np.array(extracted_batch[self.contextual_features]))
+                        else:
+                            x_scale = self.scaler.transform(np.array(extracted_batch[self.contextual_features]))
+                else:
+                    x_scale = np.array(extracted_batch[self.contextual_features])
+            else:
+                x_scale = np.zeros((len(action_ids), 0))  # zero-columns 2d array to allow concatenation later
+
+            # extract data for policy information
+            policy_information_cols = [
+                self.batch_feature,
+                self.action_feature,
+            ] + self.reward_feature
+            if self.group_feature:
+                policy_information_cols.append(self.group_feature)
+
+            policy_information = extracted_batch[policy_information_cols]
+
+            # reward probability distribution as used during simulation process if available
+            ground_truth = extracted_batch[self.true_reward_feature] if self.true_reward_feature else None
+
+            # propensity_score may be available from simulation: propensity_score is added to the dict
+            propensity_score = (
+                extracted_batch[self.propensity_score_feature].values if self.propensity_score_feature else None
+            )
+            if batch_idx == 0:
+                action_label_encoder.fit(unique_actions)
+            actions = action_label_encoder.transform(action_ids)
+
+            # Store information in a dictionary as required by obp package
+            data_batch = {
+                "n_rounds": len(action_ids),  # number of samples
+                "n_action": len(unique_actions),  # number of actions
+                "unique_actions": unique_actions,  # list of actions in the whole data set
+                "action_ids": action_ids,  # action identifiers
+                "action": actions,  # encoded action identifiers
+                "reward": reward,  # samples' reward
+                "propensity_score": propensity_score,  # propensity score, pi_b(a|x), vector
+                "context": x_scale,  # the matrix of features i.e. context
+                "data": policy_information,  # data array with informative features
+                "ground_truth": ground_truth,  # true reward probability for each action and samples, list of list
+                "cost": cost,  # samples' action cost for bandit with cost control
+            }
+            if batch_idx == 0:
+                self._train_data = data_batch
+            else:
+                self._test_data = data_batch
+
+    def _estimate_propensity_score_empirical(
+        self, batch: Dict[str, Any], groupby_cols: List[str], inner_groupby_cols: Optional[List[str]] = None
+    ) -> np.ndarray:
+        """
+        Empirical propensity score computation based on batches average
+
+        Parameters
+        ----------
+        batch: Dict[str, Any]
+            Dataset dictionary
+        groupby_cols : List[str]
+            Columns to group by
+        inner_groupby_cols : Optional[List[str]]
+            Columns to group by after the first groupby
+
+        Returns
+        -------
+        propensity_score : np.ndarray
+            computed propensity score for each of the objectives
+        """
+        inner_groupby_cols = [] if inner_groupby_cols is None else inner_groupby_cols
+        overall_groupby_cols = groupby_cols + inner_groupby_cols
+        # number of recommended actions per group and batch
+        grouped_data = batch["data"].groupby(overall_groupby_cols)[self.reward_feature[0]].count()
+
+        # proportion of recommended actions per group
+        if inner_groupby_cols:
+            empirical_distribution = pd.DataFrame(
+                grouped_data / grouped_data.groupby(inner_groupby_cols).sum()
+            ).reset_index()
+        else:
+            empirical_distribution = pd.DataFrame(grouped_data / grouped_data.sum()).reset_index()
+
+        empirical_distribution.columns = overall_groupby_cols + ["propensity_score"]
+
+        # deal with missing segment after group by
+        if len(overall_groupby_cols) > 1:
+            all_combinations = pd.DataFrame(
+                list(product(*[empirical_distribution[col].unique() for col in overall_groupby_cols])),
+                columns=overall_groupby_cols,
+            )
+
+            # Merge with the original dataframe, filling missing values in 'c' with 0
+            empirical_distribution = pd.merge(
+                all_combinations, empirical_distribution, on=groupby_cols + inner_groupby_cols, how="left"
+            ).fillna(0)
+
+        # extract propensity_score in the test set for user according to group and action recommended
+        matching_df = pd.DataFrame({k: batch["data"][k] for k in overall_groupby_cols})
+        merged_df = pd.merge(
+            matching_df,
+            empirical_distribution[overall_groupby_cols + ["propensity_score"]],
+            how="left",  # left join to ensure we get all rows from the batch
+            on=overall_groupby_cols,
+        )
+        propensity_score = merged_df["propensity_score"].values
+
+        return propensity_score
+
+    def _empirical_averaged_propensity_score(self, batch: Dict[str, Any]) -> np.ndarray:
+        """
+        Empirical propensity score computation based on batches average
+
+        Parameters
+        ----------
+        batch : Dict[str, Any]
+            dataset.
+
+        Returns
+        ------
+        : np.ndarray
+            estimated propensity_score
+        """
+
+        return self._estimate_propensity_score_empirical(
+            batch=batch, groupby_cols=[self.action_feature], inner_groupby_cols=[self.batch_feature]
+        )
+
+    def _empirical_propensity_score(self, batch: Dict[str, Any]) -> np.ndarray:
+        """
+        Propensity score empirical computation based on data set average
+
+        Parameters
+        ----------
+        batch : Dict[str, Any]
+            dataset.
+
+        Return
+        ------
+        np.ndarray
+            estimated propensity_score
+        """
+
+        return self._estimate_propensity_score_empirical(batch=batch, groupby_cols=[self.action_feature])
+
+    def _estimate_propensity_score(self):
+        """
+        Compute/approximate propensity score based on different methods in the train and test set.
+        Different approaches may be evaluated when logging policy is unknown.
+        """
+        if not self.contextual_features:
+            # if no contextual features, propensity score is directly defined by the action taken,
+            # thus uniformly set to 1
+            train_propensity_score = np.ones(self._train_data["n_rounds"])
+            test_propensity_score = np.ones(self._test_data["n_rounds"])
+            logger.warning(
+                f"No contextual features available, "
+                f"overriding the requested propensity_score_model_type={self.propensity_score_model_type} "
+                f"using uniform propensity score"
+            )
+        else:
+            if self.propensity_score_model_type == "batch_empirical":
+                if self.verbose:
+                    logger.info("Data batch-empirical estimation of propensity score.")
+
+                # Empirical approach: propensity score pi_b computed as action means per samples batch
+                train_propensity_score = self._empirical_propensity_score(self._train_data)
+                test_propensity_score = self._empirical_propensity_score(self._test_data)
+
+            elif self.propensity_score_model_type == "empirical":
+                if self.verbose:
+                    logger.info("Data empirical estimation of propensity score.")
+
+                # Empirical approach: propensity score pi_b computed as action means per samples batch
+                train_propensity_score = self._empirical_averaged_propensity_score(self._train_data)
+                test_propensity_score = self._empirical_averaged_propensity_score(self._test_data)
+
+            elif self.propensity_score_model_type == "propensity_score":
+                if self.verbose:
+                    logger.info("Data given value of propensity score.")
+
+                train_propensity_score = self._train_data["propensity_score"]
+                test_propensity_score = self._test_data["propensity_score"]
+
+            else:  # self.propensity_score_model_type in ["gbm", "rf", "logreg", "mlp"]
+                if self.verbose:
+                    logger.info(
+                        f"Data prediction of propensity score based on {self.propensity_score_model_type} model."
+                    )
+                propensity_score_estimator = _FunctionEstimator(
+                    estimator_type=self.propensity_score_model_type,
+                    fast_fit=self.fast_fit,
+                    action_one_hot_encoder=self._action_one_hot_encoder,
+                    n_trials=self.n_trials,
+                    verbose=self.verbose,
+                    study_name=f"{self.propensity_score_model_type}_propensity_score",
+                    multi_action_prediction=False,
+                )
+                propensity_score_estimator.fit(X=self._train_data, y=self._train_data["action"])
+                train_propensity_score = np.clip(
+                    propensity_score_estimator.predict(self._train_data), self._propensity_score_epsilon, 1
+                )
+                test_propensity_score = np.clip(
+                    propensity_score_estimator.predict(self._test_data), self._propensity_score_epsilon, 1
+                )
+        self._train_data["propensity_score"] = train_propensity_score
+        self._test_data["propensity_score"] = test_propensity_score
+
+    def _estimate_expected_reward(self):
+        """
+        Compute expected reward for each round and action.
+        """
+        if self.verbose:
+            logger.info(f"Data prediction of expected reward based on {self.expected_reward_model_type} model.")
+        estimated_expected_reward = {}
+        for reward_feature, reward in zip(self.reward_feature, self._train_data["reward"].T):
+            expected_reward_model = _FunctionEstimator(
+                estimator_type=self.expected_reward_model_type,
+                fast_fit=self.fast_fit,
+                action_one_hot_encoder=self._action_one_hot_encoder,
+                n_trials=self.n_trials,
+                verbose=self.verbose,
+                study_name=f"{self.expected_reward_model_type}_expected_reward",
+                multi_action_prediction=True,
+            )
+
+            expected_reward_model.fit(X=self._train_data, y=reward.T)
+
+            # predict in test set
+            estimated_expected_reward[reward_feature] = expected_reward_model.predict(self._test_data)
+        self._estimated_expected_reward = estimated_expected_reward
+
+    def _estimate_importance_weight(self, mab: BaseMab) -> np.ndarray:
+        """
+        Compute importance weights induced by the behavior and evaluation policies.
+
+        Reference: Balanced Off-Policy Evaluation in General Action Spaces (Sondhi, Arbour, and Dimmery, 2020)
+                   https://arxiv.org/pdf/1906.03694
+
+        Parameters
+        ----------
+        mab : BaseMab
+            Multi-armed bandit to be evaluated
+
+        Return
+        ------
+        expected_importance_weights : np.ndarray
+            estimated importance weights
+        """
+        if self.verbose:
+            logger.info(f"Data prediction of importance weights based on {self.importance_weights_model_type} model.")
+
+        importance_weights_model = _FunctionEstimator(
+            estimator_type=self.importance_weights_model_type,
+            fast_fit=self.fast_fit,
+            action_one_hot_encoder=self._action_one_hot_encoder,
+            n_trials=self.n_trials,
+            verbose=self.verbose,
+            study_name=f"{self.importance_weights_model_type}_importance_weights",
+            multi_action_prediction=False,
+        )
+        train_data = deepcopy(self._train_data)
+        mab_data = self._train_data["context"] if self.contextual_features else self._train_data["n_rounds"]
+        selected_actions = _mab_predict(mab, mab_data)
+        train_data["action_ids"] = np.concatenate((train_data["action_ids"], selected_actions), axis=0)
+        train_data["context"] = np.concatenate((train_data["context"], train_data["context"]), axis=0)
+        y = np.concatenate((np.zeros(len(selected_actions)), np.ones(len(selected_actions))), axis=0)
+        importance_weights_model.fit(X=train_data, y=y)
+
+        # predict in test set
+        estimated_proba = importance_weights_model.predict(self._test_data)
+        expected_importance_weights = estimated_proba / (1 - estimated_proba)
+        return expected_importance_weights
+
+    def _estimate_policy(
+        self,
+        mab: BaseMab,
+        n_mc_experiments: PositiveInt = 1000,
+        n_cores: Optional[NonNegativeInt] = None,
+    ) -> np.ndarray:
+        """
+        Estimate policy via Monte Carlo (MC) sampling based on sampling distribution of each action a in the test set.
+
+        Reference:  Estimation Considerations in Contextual Bandit
+                    https://arxiv.org/pdf/1711.07077.pdf
+        Reference:  Debiased Off-Policy Evaluation for Recommendation Systems
+                    https://arxiv.org/pdf/2002.08536.pdf
+        Reference:  CAB: Continuous Adaptive Blending for Policy Evaluation and Learning
+                    https://arxiv.org/pdf/1811.02672.pdf
+
+        Parameters
+        ----------
+        mab : BaseMab
+            Multi-armed bandit to be evaluated
+        n_mc_experiments: PositiveInt
+            Number of MC sampling rounds. Default: 1000
+        n_cores: Optional[NonNegativeInt], all available cores if not specified
+            Number of cores used for multiprocessing
+
+        Returns
+        -------
+        estimated_policy: np.ndarray (nb samples, nb actions)
+            action probabilities for each action and samples
+        """
+        if self.verbose:
+            logger.info("Data prediction of expected policy based on Monte Carlo experiments.")
+        n_cores = n_cores or cpu_count()
+
+        # using MC, create a () best actions matrix
+        mc_actions = []
+        mab_data = self._test_data["context"] if self.contextual_features else self._test_data["n_rounds"]
+        predict_func = partial(_mab_predict, mab, mab_data)
+        with Pool(processes=n_cores) as pool:
+            # predict best action for a new prior parameters draw
+            # using argmax(p(r|a, x)) with a in the list of actions
+            for mc_action in tqdm(pool.imap_unordered(predict_func, range(n_mc_experiments))):
+                mc_actions.append(mc_action)
+
+        # finalize the dataframe shape to #samples X #mc experiments
+        mc_actions = pd.DataFrame(mc_actions).T
+
+        # for each sample / each action, count the occurrence frequency during MC iteration
+        estimated_policy = np.zeros((self._test_data["n_rounds"], len(self._test_data["unique_actions"])))
+        mc_action_counts = mc_actions.apply(pd.Series.value_counts, axis=1).fillna(0)
+
+        for u in tqdm(range(self._test_data["n_rounds"])):
+            estimated_policy[u, :] = (
+                mc_action_counts.iloc[u, :].reindex(self._test_data["unique_actions"], fill_value=0).values
+                / mc_actions.shape[1]
+            )
+        return estimated_policy
+
+    def evaluate(
+        self,
+        mab: BaseMab,
+        n_mc_experiments: int = 1000,
+        save_path: Optional[str] = None,
+        visualize: bool = True,
+    ) -> pd.DataFrame:
+        """
+        Execute the OPE process with multiple estimators simultaneously.
+
+        Parameters
+        ----------
+        mab : BaseMab
+            Multi-armed bandit model to be evaluated
+        n_mc_experiments : int
+            Number of Monte Carlo experiments for policy estimation
+        save_path : Optional[str], defaults to None.
+            Path to save the results. Nothing is saved if not specified.
+        visualize : bool, defaults to True.
+            Whether to visualize the results of the OPE process
+
+        Returns
+        -------
+        estimated_policy_value_df : pd.DataFrame
+            Estimated policy values and confidence intervals
+        """
+        if visualize and not save_path and not in_jupyter_notebook():
+            raise ValueError("save_path is required for visualization when not running in a Jupyter notebook")
+
+        # Define OPE keyword arguments
+        kwargs = {}
+        if self._check_argument_required_by_estimators("action", self.ope_estimators):
+            kwargs["action"] = self._test_data["action"]
+        if self._check_argument_required_by_estimators("estimated_policy", self.ope_estimators):
+            kwargs["estimated_policy"] = self._estimate_policy(mab=mab, n_mc_experiments=n_mc_experiments)
+        if self._check_argument_required_by_estimators("propensity_score", self.ope_estimators):
+            kwargs["propensity_score"] = self._test_data["propensity_score"]
+        if self._check_argument_required_by_estimators("expected_importance_weight", self.ope_estimators):
+            kwargs["expected_importance_weight"] = self._estimate_importance_weight(mab)
+
+        # Instantiate class to conduct OPE by multiple estimators simultaneously
+        multi_objective_estimated_policy_value_df = pd.DataFrame()
+        results = {"value": [], "lower": [], "upper": [], "std": [], "estimator": [], "objective": []}
+        for reward_feature in self.reward_feature:
+            if self.verbose:
+                logger.info(f"Offline Policy Evaluation for {reward_feature}.")
+
+            if self._check_argument_required_by_estimators("reward", self.ope_estimators):
+                kwargs["reward"] = self._test_data["reward"][:, self.reward_feature.index(reward_feature)]
+            if self._check_argument_required_by_estimators("expected_reward", self.ope_estimators):
+                kwargs["expected_reward"] = self._estimated_expected_reward[reward_feature]
+
+            # Summarize policy values and their confidence intervals estimated by OPE estimators
+            for ope_estimator in self.ope_estimators:
+                estimated_policy_value, low, high, std = ope_estimator.estimate_policy_value_with_confidence_interval(
+                    **kwargs,
+                )
+                results["value"].append(estimated_policy_value)
+                results["lower"].append(low)
+                results["upper"].append(high)
+                results["std"].append(std)
+                results["estimator"].append(ope_estimator.name)
+                results["objective"].append(reward_feature)
+
+            multi_objective_estimated_policy_value_df = pd.concat(
+                [multi_objective_estimated_policy_value_df, pd.DataFrame.from_dict(results)],
+                axis=0,
+            )
+        if save_path:
+            multi_objective_estimated_policy_value_df.to_csv(os.path.join(save_path, "estimated_policy_value.csv"))
+
+        if visualize:
+            self._visualize_results(save_path, multi_objective_estimated_policy_value_df)
+
+        return multi_objective_estimated_policy_value_df
+
+    def update_and_evaluate(
+        self,
+        mab: BaseMab,
+        n_mc_experiments: int = 1000,
+        save_path: Optional[str] = None,
+        visualize: bool = True,
+        with_test: bool = False,
+    ) -> pd.DataFrame:
+        """
+        Execute update of the multi-armed bandit based on the logged data,
+        followed by the OPE process with multiple estimators simultaneously.
+
+        Parameters
+        ----------
+        mab : BaseMab
+            Multi-armed bandit model to be updated and evaluated
+        n_mc_experiments : int
+            Number of Monte Carlo experiments for policy estimation
+        save_path : Optional[str]
+            Path to save the results. Nothing is saved if not specified.
+        visualize : bool
+            Whether to visualize the results of the OPE process
+        with_test : bool
+            Whether to update the bandit model with the test data
+
+        Returns
+        -------
+        estimated_policy_value_df : pd.DataFrame
+            Estimated policy values
+        """
+        self._update_mab(mab, self._train_data)
+        if with_test:
+            self._update_mab(mab, self._test_data)
+        estimated_policy_value_df = self.evaluate(mab, n_mc_experiments, save_path, visualize)
+        return estimated_policy_value_df
+
+    def _update_mab(self, mab: BaseMab, data: Dict[str, Any]):
+        """
+        Update the multi-armed bandit model based on the logged data.
+
+        Parameters
+        ----------
+        mab : BaseMab
+            Multi-armed bandit model to be updated.
+        data : Dict[str, Any]
+            Data used to update the bandit model.
+        """
+        if self.verbose:
+            logger.info(f"Offline policy update for {type(mab)}.")
+        kwargs = {"context": data["context"]} if self.contextual_features else {}
+        mab.update(actions=data["action_ids"].tolist(), rewards=np.squeeze(data["reward"]).tolist(), **kwargs)
+
+    def _visualize_results(self, save_path: Optional[str], multi_objective_estimated_policy_value_df: pd.DataFrame):
+        """
+        Visualize the results of the OPE process.
+
+        Parameters
+        ----------
+        save_path : Optional[str]
+            Path to save the visualization results. Required if not running in a Jupyter notebook.
+        multi_objective_estimated_policy_value_df : pd.DataFrame
+            Estimated confidence intervals
+        """
+
+        tabs = []
+        grouped_df = multi_objective_estimated_policy_value_df.groupby("objective")
+        tools = "crosshair, pan, wheel_zoom, box_zoom, reset, hover, save"
+
+        tooltips = [
+            ("Estimator", "@estimator"),
+            ("Estimated policy value", "@value"),
+            ("Lower CI", "@lower"),
+            ("Upper CI", "@upper"),
+        ]
+        for group_name, estimated_interval_df in grouped_df:
+            source = ColumnDataSource(
+                data=dict(
+                    estimator=estimated_interval_df["estimator"],
+                    value=estimated_interval_df["value"],
+                    lower=estimated_interval_df["lower"],
+                    upper=estimated_interval_df["upper"],
+                )
+            )
+            fig = figure(
+                title=f"Policy value estimates for {group_name} objective",
+                x_axis_label="Estimator",
+                y_axis_label="Estimated policy value (\u00b1 CI)",
+                sizing_mode="inherit",
+                x_range=source.data["estimator"],
+                tools=tools,
+                tooltips=tooltips,
+            )
+            fig.vbar(x="estimator", top="value", width=0.9, source=source)
+
+            # Add error bars for confidence intervals
+            fig.segment(
+                x0="estimator", y0="lower", x1="estimator", y1="upper", source=source, line_width=2, color="black"
+            )  # error bar line
+            fig.vbar(
+                x="estimator", width=0.1, bottom="lower", top="upper", source=source, color="black"
+            )  # error bar cap
+
+            fig.xgrid.grid_line_color = None
+
+            tabs.append(TabPanel(child=fig, title=f"{group_name}"))
+
+        output_path = os.path.join(save_path, "multi_objective_estimated_policy.html") if save_path else None
+        visualize_via_bokeh(tabs=tabs, output_path=output_path)
+
+
+def _mab_predict(mab: BaseMab, mab_data: Union[np.ndarray, PositiveInt], mc_experiment: int = 0) -> List[ActionId]:
+    """
+    bandit action probabilities prediction in test set
+
+    Parameters
+    ----------
+    mab : BaseMab
+        Multi-armed bandit model
+    mab_data : Union[np.ndarray, PositiveInt]
+        test data used to update the bandit model; context or number of samples.
+    mc_experiment : int
+        placeholder for multiprocessing
+
+    Returns
+    -------
+    actions: List[ActionId] of shape (n_samples,)
+        The actions selected by the multi-armed bandit model.
+    """
+    mab_output = mab.predict(context=mab_data) if type(mab_data) is np.ndarray else mab.predict(n_samples=mab_data)
+    actions = mab_output[0]
+    return actions
diff --git a/pybandits/pydantic_version_compatibility.py b/pybandits/pydantic_version_compatibility.py
index a119264..76a8a07 100644
--- a/pybandits/pydantic_version_compatibility.py
+++ b/pybandits/pydantic_version_compatibility.py
@@ -27,7 +27,19 @@
 from typing import Any, Callable, Dict, Literal, Optional, Union
 from warnings import warn
 
-from pydantic import BaseModel, Field, NonNegativeFloat, PositiveInt, ValidationError, confloat, conint, constr
+from pydantic import (
+    BaseModel,
+    Field,
+    NonNegativeFloat,
+    NonNegativeInt,
+    PositiveFloat,
+    PositiveInt,
+    PrivateAttr,
+    ValidationError,
+    confloat,
+    conint,
+    constr,
+)
 from pydantic.version import VERSION as _VERSION
 
 # Define the pydantic versions
@@ -258,6 +270,8 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) ->
     "model_validator",
     "validate_call",
     "NonNegativeFloat",
+    "NonNegativeInt",
+    "PositiveFloat",
     "PositiveInt",
     "BaseModel",
     "ValidationError",
@@ -265,4 +279,5 @@ def _convert_config_param(config: Dict[str, Any], v2_name: str, v1_name: str) ->
     "conint",
     "constr",
     "Field",
+    "PrivateAttr",
 ]
diff --git a/pybandits/strategy.py b/pybandits/strategy.py
index a67be09..c1b33d8 100644
--- a/pybandits/strategy.py
+++ b/pybandits/strategy.py
@@ -19,14 +19,18 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-
 from abc import ABC, abstractmethod
 from random import random
+from sys import version_info
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
 from scipy.stats import ttest_ind_from_stats
-from typing_extensions import Self
+
+if version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
 
 from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel
 from pybandits.model import Beta, BetaMOCC, Model
diff --git a/pybandits/utils.py b/pybandits/utils.py
index d0577b5..3d703df 100644
--- a/pybandits/utils.py
+++ b/pybandits/utils.py
@@ -1,5 +1,12 @@
+import inspect
 import json
-from typing import Any, Callable, Dict, List, Union
+from abc import ABC
+from types import ModuleType
+from typing import Any, Callable, Dict, List, Optional, Union
+
+from bokeh.io import curdoc, output_file, output_notebook, save, show
+from bokeh.models import InlineStyleSheet, TabPanel, Tabs
+from IPython import get_ipython
 
 from pybandits.pydantic_version_compatibility import validate_call
 
@@ -42,3 +49,74 @@ def extract_argument_names_from_function(function_handle: Callable, is_class_met
     start_index = int(is_class_method)
     argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount]
     return argument_names
+
+
+@validate_call(config=dict(arbitrary_types_allowed=True))
+def get_non_abstract_classes(module: ModuleType) -> List[type]:
+    non_abc_classes = []
+    for name, obj in inspect.getmembers(module, inspect.isclass):
+        # Ensure the class is defined in the module and not imported
+        if obj.__module__ == module.__name__:
+            # Check if the class is not an abstract class (i.e., doesn't inherit from abc.ABC)
+            if not inspect.isabstract(obj) and ABC not in obj.__bases__:
+                non_abc_classes.append(obj)
+    return non_abc_classes
+
+
+def in_jupyter_notebook() -> bool:
+    """
+    Check if the code is running in a Jupyter notebook.
+
+    Reference: https://stackoverflow.com/a/39662359
+
+    Returns
+    -------
+    bool
+        True if the code is running in a Jupyter notebook, False otherwise.
+    """
+
+    try:
+        shell = get_ipython().__class__.__name__
+
+        if shell == "ZMQInteractiveShell":
+            return True  # Jupyter notebook or qtconsole
+
+        elif shell == "TerminalInteractiveShell":
+            return False  # Terminal running IPython
+
+        else:
+            return False  # Other type (likely shouldn't happen)
+
+    except NameError:
+        return False  # Probably standard Python interpreter
+
+
+def visualize_via_bokeh(output_path: Optional[str], tabs: List[TabPanel]):
+    """
+    Visualize output to either a Jupyter notebook or an HTML file.
+
+    Parameters
+    ----------
+    output_path : Optional[str]
+        Path to the output file. Required if not running in a Jupyter notebook.
+    """
+
+    if in_jupyter_notebook():
+        output_notebook()
+    else:
+        if output_path is None:
+            raise ValueError("output_path is required when not running in a Jupyter notebook.")
+        output_file(output_path)
+
+    # Add a Div model to the Bokeh layout for flexible tabs
+    css = """
+                 :host(.bk-Tabs) .bk-header {
+                     flex-wrap: wrap !important;
+                 }
+             """
+    stylesheet = InlineStyleSheet(css=css)
+    curdoc().title = "Visual report"
+    if in_jupyter_notebook():
+        show(Tabs(tabs=tabs, stylesheets=[stylesheet]))
+    else:
+        save(Tabs(tabs=tabs, sizing_mode="stretch_both", stylesheets=[stylesheet]))
diff --git a/pyproject.toml b/pyproject.toml
index 91bf9fa..65f6303 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pybandits"
-version = "1.0.2"
+version = "1.2.0"
 description = "Python Multi-Armed Bandit Library"
 authors = [
     "Dario d'Andrea <dariod@playtika.com>",
@@ -11,28 +11,41 @@ authors = [
 ]
 license = "MIT License"
 readme = "README.md"
+homepage = "https://github.com/PlaytikaOSS/pybandits"
+repository = "https://github.com/PlaytikaOSS/pybandits"
+keywords = ["multi-armed bandit", "reinforcement-learning", "optimization"]
 
 [tool.poetry.dependencies]
-python = ">=3.8.1,<3.12"
+python = ">=3.8.1,<3.13"
 loguru = "^0.6"
-numpy = "^1.23"
+numpy = [
+    { version = "<1.25", python = "3.8.*" },
+    { version = ">=1.25", python = ">=3.9,<3.12" },
+    { version = ">=1.26", python = "3.12.*" },
+]
 pydantic = ">=1.10"
-scipy = "^1.9"
-pymc = "^5.3"
+scipy = [
+    { version = ">=1.9,<1.13", python = ">=3.8,<3.12" },
+    { version = ">=1.11,<1.13", python = "3.12.*" },
+]
+pymc = [
+    { version = "^5.3", python = "3.8.*" },
+    { version = "^5.10", python = ">=3.9" },
+]
 scikit-learn = "^1.1"
+optuna = "^3.6"
+bokeh = "^3.1"
 
 [tool.poetry.group.dev.dependencies]
 hypothesis = "^6.68.2"
-pytest = "^7.2.2"
+pytest = "^8.3.3"
 tox = "^4.4.7"
-pandas = "^1.5.3"
+pandas = ">=1.5.3"
 pre-commit = "^3.1.1"
-nbdev = "^2.3.12"
-rich = "^13.3.2"
-pyzmq = "25.0.0"
+nbstripout = "^0.7.1"
 ipykernel = "^6.21.3"
 jupyterlab = "^3.6.1"
-pytest-cov = "^4.0.0"
+pytest-cov = "^5.0.0"
 pytest_mock = "^3.14.0"
 ruff = "^0.5.6"
 
diff --git a/tests/test_cmab.py b/tests/test_cmab.py
index fdf2173..3b365ed 100644
--- a/tests/test_cmab.py
+++ b/tests/test_cmab.py
@@ -159,7 +159,7 @@ def test_cmab_init_with_wrong_blr_models(n_features, other_n_features, update_me
         )
 
 
-@settings(deadline=60000)
+@settings(deadline=None)
 @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods))
 def test_cmab_update(n_samples, n_features, update_method):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
@@ -200,7 +200,7 @@ def run_update(context):
     run_update(context=context)
 
 
-@settings(deadline=10000)
+@settings(deadline=None)
 @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods))
 def test_cmab_update_not_all_actions(n_samples, n_feat, update_method):
     actions = np.random.choice(["a3", "a4"], size=n_samples).tolist()
@@ -547,7 +547,7 @@ def test_cmab_bai_predict(n_samples, n_features):
     assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples
 
 
-@settings(deadline=10000)
+@settings(deadline=None)
 @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods))
 def test_cmab_bai_update(n_samples, n_features, update_method):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
@@ -783,7 +783,7 @@ def test_cmab_cc_predict(n_samples, n_features):
     assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples
 
 
-@settings(deadline=10000)
+@settings(deadline=None)
 @given(st.just(100), st.just(3), st.sampled_from(literal_update_methods))
 def test_cmab_cc_update(n_samples, n_features, update_method):
     actions = np.random.choice(["a1", "a2"], size=n_samples).tolist()
diff --git a/tests/test_offline_policy_estimator.py b/tests/test_offline_policy_estimator.py
new file mode 100644
index 0000000..d345413
--- /dev/null
+++ b/tests/test_offline_policy_estimator.py
@@ -0,0 +1,162 @@
+from typing import Tuple
+from unittest import mock
+
+import numpy as np
+import pytest
+from hypothesis import assume, given
+from hypothesis import strategies as st
+from hypothesis.extra.numpy import arrays
+
+from pybandits import offline_policy_estimator
+from pybandits.offline_policy_estimator import BaseOfflinePolicyEstimator
+from pybandits.utils import get_non_abstract_classes
+
+
+@st.composite
+def invalid_inputs(draw, n_samples: int = 10, n_actions: int = 2):
+    reward = None
+    propensity_score = None
+    estimated_policy = None
+    expected_reward = None
+    expected_importance_weight = None
+    bad_argument = draw(
+        st.sampled_from(
+            [
+                "action",
+                "reward",
+                "propensity_score",
+                "estimated_policy",
+                "expected_reward",
+                "expected_importance_weight",
+            ]
+        )
+    )
+    if bad_argument == "action":
+        action = draw(arrays(dtype=int, shape=(n_samples, 2), elements=st.integers(0, n_actions - 1)))
+    else:
+        action = draw(arrays(dtype=int, shape=(n_samples,), elements=st.integers(0, n_actions - 1)))
+        assume(np.unique(action).size == n_actions)
+        if bad_argument == "reward":
+            reward = draw(
+                st.one_of(
+                    arrays(dtype=int, shape=(n_samples, 2), elements=st.integers(0, 1)),
+                    arrays(dtype=float, shape=(n_samples,), elements=st.floats(0, 1)),
+                    arrays(
+                        dtype=int,
+                        shape=(n_samples - 1,),
+                        elements=st.integers(0, 1),
+                    ),
+                    arrays(
+                        dtype=int,
+                        shape=(n_samples + 1,),
+                        elements=st.integers(0, 1),
+                    ),
+                )
+            )
+        elif bad_argument in ("propensity_score", "expected_importance_weight"):
+            random_value = draw(
+                st.one_of(
+                    arrays(dtype=float, shape=(n_samples, 2), elements=st.floats(0, 1)),
+                    arrays(dtype=float, shape=(n_samples,), elements=st.floats(0, 0)),
+                    arrays(dtype=int, shape=(n_samples,), elements=st.integers(0, 1)),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples - 1,),
+                        elements=st.floats(0, 1),
+                    ),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples + 1,),
+                        elements=st.floats(0, 1),
+                    ),
+                )
+            )
+
+            if bad_argument == "propensity_score":
+                propensity_score = random_value
+            elif bad_argument == "expected_importance_weight":
+                expected_importance_weight = random_value
+        elif bad_argument == "estimated_policy":
+            estimated_policy = draw(
+                st.one_of(
+                    arrays(dtype=float, shape=(n_samples,), elements=st.floats(0, 1)),
+                    arrays(dtype=float, shape=(n_samples, 2), elements=st.floats(0, 0)),
+                    arrays(dtype=int, shape=(n_samples, 2), elements=st.integers(0, 1)),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples - 1, 1),
+                        elements=st.floats(0, 1),
+                    ),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples + 1, 1),
+                        elements=st.floats(0, 1),
+                    ),
+                )
+            )
+        elif bad_argument == "expected_reward":
+            expected_reward = draw(
+                st.one_of(
+                    arrays(dtype=float, shape=(n_samples,), elements=st.floats(0, 1)),
+                    arrays(dtype=int, shape=(n_samples, 2), elements=st.integers(0, 1)),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples - 1, 1),
+                        elements=st.floats(0, 1),
+                    ),
+                    arrays(
+                        dtype=float,
+                        shape=(n_samples + 1, 1),
+                        elements=st.floats(0, 1),
+                    ),
+                )
+            )
+        else:
+            raise ValueError(f"Invalid bad_argument: {bad_argument}")
+    return action, reward, propensity_score, estimated_policy, expected_reward, expected_importance_weight
+
+
+@mock.patch.multiple(BaseOfflinePolicyEstimator, __abstractmethods__=set())
+@given(invalid_inputs())
+def test_shape_mismatches(
+    inputs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray],
+):
+    action, reward, propensity_score, estimated_policy, expected_reward, expected_importance_weight = inputs
+    estimator = BaseOfflinePolicyEstimator()
+    kwargs = {}
+    if reward is not None:
+        kwargs["reward"] = reward
+    if propensity_score is not None:
+        kwargs["propensity_score"] = propensity_score
+    if estimated_policy is not None:
+        kwargs["estimated_policy"] = estimated_policy
+    if expected_reward is not None:
+        kwargs["expected_reward"] = expected_reward
+    if expected_importance_weight is not None:
+        kwargs["expected_importance_weight"] = expected_importance_weight
+    with pytest.raises(ValueError):
+        estimator._check_inputs(action=action, **kwargs)
+
+
+@given(
+    arrays(dtype=int, shape=(10,), elements=st.integers(0, 1)),
+    arrays(dtype=int, shape=(10,), elements=st.integers(0, 1)),
+    arrays(dtype=float, shape=(10,), elements=st.floats(0.01, 1)),
+    arrays(dtype=float, shape=(10, 2), elements=st.floats(0.01, 1)),
+    arrays(dtype=float, shape=(10, 2), elements=st.floats(0, 1)),
+    arrays(dtype=float, shape=(10,), elements=st.floats(0.01, 1)),
+)
+def test_default_estimators(
+    action, reward, propensity_score, estimated_policy, expected_reward, expected_importance_weight
+):
+    if np.unique(action).size > 1:
+        estimators = [class_() for class_ in get_non_abstract_classes(offline_policy_estimator)]
+        for estimator in estimators:
+            estimator.estimate_policy_value_with_confidence_interval(
+                action=action,
+                reward=reward,
+                propensity_score=propensity_score,
+                estimated_policy=estimated_policy,
+                expected_reward=expected_reward,
+                expected_importance_weight=expected_importance_weight,
+            )
diff --git a/tests/test_offline_policy_evaluator.py b/tests/test_offline_policy_evaluator.py
new file mode 100644
index 0000000..6067529
--- /dev/null
+++ b/tests/test_offline_policy_evaluator.py
@@ -0,0 +1,300 @@
+from tempfile import TemporaryDirectory
+from typing import Dict, List, Optional, Union, get_args, get_type_hints
+
+import numpy as np
+import pandas as pd
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from matplotlib.pyplot import close
+from pydantic import PositiveInt
+from pytest_mock import MockerFixture
+from sklearn.base import TransformerMixin
+from sklearn.preprocessing import MinMaxScaler
+
+from pybandits.cmab import CmabBernoulli, CmabBernoulliCC
+from pybandits.offline_policy_estimator import BaseOfflinePolicyEstimator
+from pybandits.offline_policy_evaluator import OfflinePolicyEvaluator
+from pybandits.smab import (
+    SmabBernoulli,
+    SmabBernoulliCC,
+    SmabBernoulliMO,
+    SmabBernoulliMOCC,
+)
+
+
+@pytest.fixture(scope="module")
+def logged_data(n_samples=10, n_actions=2, n_batches=3, n_rewards=2, n_groups=2, n_features=3):
+    unique_actions = [f"a{i}" for i in range(n_actions)]
+    action_ids = np.random.choice(unique_actions, n_samples * n_batches)
+    batches = [i for i in range(n_batches) for _ in range(n_samples)]
+    rewards = [np.random.randint(2, size=(n_samples * n_batches)) for _ in range(n_rewards)]
+    action_true_rewards = {(a, r): np.random.rand() for a in unique_actions for r in range(n_rewards)}
+    true_rewards = [
+        np.array([action_true_rewards[(a, r)] for a in action_ids]).reshape(n_samples * n_batches)
+        for r in range(n_rewards)
+    ]
+    groups = np.random.randint(n_groups, size=n_samples * n_batches)
+    action_costs = {action: np.random.rand() for action in unique_actions}
+    costs = np.array([action_costs[a] for a in action_ids])
+    context = np.random.rand(n_samples * n_batches, n_features)
+    action_propensity_score = {action: np.random.rand() for action in unique_actions}
+    propensity_score = np.array([action_propensity_score[a] for a in action_ids])
+    return pd.DataFrame(
+        {
+            "batch": batches,
+            "action_id": action_ids,
+            "cost": costs,
+            "group": groups,
+            **{f"reward_{r}": rewards[r] for r in range(n_rewards)},
+            **{f"true_reward_{r}": true_rewards[r] for r in range(n_rewards)},
+            **{f"context_{i}": context[:, i] for i in range(n_features)},
+            "propensity_score": propensity_score,
+        }
+    )
+
+
+# validate failure for empty logged_data
+def test_empty_logged_data(
+    split_prop=0.5,
+    n_trials=10,
+    verbose=False,
+    batch_feature="batch",
+    action_feature="action_id",
+    reward_feature="reward",
+    propensity_score_model_type="empirical",
+    expected_reward_model_type="logreg",
+    importance_weights_model_type="logreg",
+):
+    with pytest.raises(AttributeError):
+        OfflinePolicyEvaluator(
+            logged_data=pd.DataFrame(),
+            split_prop=split_prop,
+            propensity_score_model_type=propensity_score_model_type,
+            expected_reward_model_type=expected_reward_model_type,
+            importance_weights_model_type=importance_weights_model_type,
+            n_trials=n_trials,
+            ope_metrics=None,
+            batch_feature=batch_feature,
+            action_feature=action_feature,
+            reward_feature=reward_feature,
+            verbose=verbose,
+        )
+
+
+@pytest.mark.usefixtures("logged_data")
+@given(
+    split_prop=st.sampled_from([0.0, 1.0]),
+    n_trials=st.just(10),
+    ope_metrics=st.just(None),
+    verbose=st.just(False),
+    batch_feature=st.just("batch"),
+    action_feature=st.just("action_id"),
+    reward_feature=st.just("reward_0"),
+    propensity_score_model_type=st.just("empirical"),
+    expected_reward_model_type=st.just("logreg"),
+    importance_weights_model_type=st.just("logreg"),
+)
+# validate failure for extreme split_prop values
+def test_initialization_extreme_split_prop(
+    logged_data: MockerFixture,
+    split_prop: float,
+    n_trials: PositiveInt,
+    ope_metrics: Optional[List[BaseOfflinePolicyEstimator]],
+    verbose: bool,
+    batch_feature: str,
+    action_feature: str,
+    reward_feature: str,
+    propensity_score_model_type: str,
+    expected_reward_model_type: str,
+    importance_weights_model_type: str,
+):
+    with pytest.raises(ValueError):
+        OfflinePolicyEvaluator(
+            logged_data=logged_data,
+            split_prop=split_prop,
+            propensity_score_model_type=propensity_score_model_type,
+            expected_reward_model_type=expected_reward_model_type,
+            importance_weights_model_type=importance_weights_model_type,
+            n_trials=n_trials,
+            ope_metrics=ope_metrics,
+            batch_feature=batch_feature,
+            action_feature=action_feature,
+            reward_feature=reward_feature,
+            true_reward_feature=reward_feature,
+            verbose=verbose,
+        )
+
+
+# validate failure for invalid initialization parameters
+def test_initialization_mismatches(
+    logged_data: MockerFixture,
+    split_prop=0.5,
+    n_trials=10,
+    ope_metrics=None,
+    verbose=False,
+    batch_feature="batch",
+    action_feature="action_id",
+    reward_feature="reward_0",
+    propensity_score_model_type="empirical",
+    expected_reward_model_type="logreg",
+    importance_weights_model_type="logreg",
+):
+    # more true_reward_features than rewards
+    with pytest.raises(ValueError):
+        OfflinePolicyEvaluator(
+            logged_data=logged_data,
+            split_prop=split_prop,
+            propensity_score_model_type=propensity_score_model_type,
+            expected_reward_model_type=expected_reward_model_type,
+            importance_weights_model_type=importance_weights_model_type,
+            n_trials=n_trials,
+            ope_metrics=ope_metrics,
+            batch_feature=batch_feature,
+            action_feature=action_feature,
+            reward_feature=reward_feature,
+            true_reward_feature=[reward_feature, reward_feature],
+            verbose=verbose,
+        )
+    # missing propensity_score_feature
+    with pytest.raises(ValueError):
+        OfflinePolicyEvaluator(
+            logged_data=logged_data,
+            split_prop=split_prop,
+            propensity_score_model_type="propensity_score",
+            expected_reward_model_type=expected_reward_model_type,
+            importance_weights_model_type=importance_weights_model_type,
+            n_trials=n_trials,
+            ope_metrics=ope_metrics,
+            batch_feature=batch_feature,
+            action_feature=action_feature,
+            reward_feature=reward_feature,
+            visualize=False,
+        )
+    # missing context
+    with pytest.raises(AttributeError):
+        OfflinePolicyEvaluator(
+            logged_data=logged_data,
+            split_prop=split_prop,
+            propensity_score_model_type=propensity_score_model_type,
+            expected_reward_model_type=expected_reward_model_type,
+            importance_weights_model_type=importance_weights_model_type,
+            n_trials=n_trials,
+            ope_metrics=ope_metrics,
+            batch_feature=batch_feature,
+            action_feature=action_feature,
+            reward_feature=reward_feature,
+            verbose=False,
+            contextual_features=["non_existent"],
+        )
+
+
+@pytest.mark.usefixtures("logged_data")
+@settings(deadline=None)
+@given(
+    split_prop=st.just(0.5),
+    n_trials=st.just(10),
+    fast_fit=st.booleans(),
+    scaler=st.sampled_from([None, MinMaxScaler()]),
+    verbose=st.booleans(),
+    visualize=st.booleans(),
+    propensity_score_model_type=st.sampled_from(
+        get_args(get_type_hints(OfflinePolicyEvaluator)["propensity_score_model_type"])
+    ),
+    expected_reward_model_type=st.sampled_from(
+        get_args(get_type_hints(OfflinePolicyEvaluator)["expected_reward_model_type"])
+    ),
+    importance_weights_model_type=st.sampled_from(
+        get_args(get_type_hints(OfflinePolicyEvaluator)["importance_weights_model_type"])
+    ),
+    batch_feature=st.just("batch"),
+    action_feature=st.just("action_id"),
+    reward_feature=st.sampled_from(["reward_0", ["reward_0", "reward_1"]]),
+    context=st.booleans(),
+    group_feature=st.sampled_from(["group", None]),
+    cost_feature=st.sampled_from(["cost", None]),
+    propensity_score_feature=st.just("propensity_score"),
+    n_mc_experiments=st.just(2),
+    update=st.booleans(),
+)
+# test various OfflinePolicyEvaluator configurations to validate that everything works
+def test_running_configuration(
+    logged_data: MockerFixture,
+    split_prop: float,
+    n_trials: PositiveInt,
+    fast_fit: bool,
+    scaler: Optional[Union[TransformerMixin, Dict[str, TransformerMixin]]],
+    verbose: bool,
+    visualize: bool,
+    propensity_score_model_type: str,
+    expected_reward_model_type: str,
+    importance_weights_model_type: str,
+    batch_feature: str,
+    action_feature: str,
+    reward_feature: Union[str, List[int]],
+    context: bool,
+    group_feature: Optional[str],
+    cost_feature: Optional[str],
+    propensity_score_feature: Optional[str],
+    n_mc_experiments: int,
+    update: bool,
+):
+    if context and type(reward_feature) is List:
+        pass  # CmabMO and CmabMOCC are not supported yet
+    true_reward_feature = (
+        f"true_{reward_feature}" if isinstance(reward_feature, str) else [f"true_{r}" for r in reward_feature]
+    )
+    contextual_features = [col for col in logged_data.columns if col.startswith("context")] if context else None
+    unique_actions = logged_data["action_id"].unique()
+    if cost_feature:
+        action_ids_cost = {
+            action_id: logged_data["cost"][logged_data["action_id"] == action_id].iloc[0]
+            for action_id in unique_actions
+        }
+    if context:
+        if cost_feature:
+            if type(reward_feature) is list:
+                return  # CmabMOCC is not supported yet
+            else:
+                mab = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=len(contextual_features))
+        else:
+            if type(reward_feature) is list:
+                return  # CmabMO is not supported yet
+            else:
+                mab = CmabBernoulli.cold_start(action_ids=set(unique_actions), n_features=len(contextual_features))
+    else:
+        if cost_feature:
+            if type(reward_feature) is list:
+                mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=len(reward_feature))
+            else:
+                mab = SmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost)
+        else:
+            if type(reward_feature) is list:
+                mab = SmabBernoulliMO.cold_start(action_ids=set(unique_actions), n_objectives=len(reward_feature))
+            else:
+                mab = SmabBernoulli.cold_start(action_ids=set(unique_actions))
+    evaluator = OfflinePolicyEvaluator(
+        logged_data=logged_data,
+        split_prop=split_prop,
+        n_trials=n_trials,
+        fast_fit=fast_fit,
+        scaler=scaler,
+        ope_estimators=None,
+        verbose=verbose,
+        propensity_score_model_type=propensity_score_model_type,
+        expected_reward_model_type=expected_reward_model_type,
+        importance_weights_model_type=importance_weights_model_type,
+        batch_feature=batch_feature,
+        action_feature=action_feature,
+        reward_feature=reward_feature,
+        true_reward_feature=true_reward_feature,
+        contextual_features=contextual_features,
+        group_feature=group_feature,
+        cost_feature=cost_feature,
+        propensity_score_feature=propensity_score_feature,
+    )
+    execution_func = evaluator.update_and_evaluate if update else evaluator.evaluate
+    with TemporaryDirectory() as tmp_dir:
+        execution_func(mab=mab, visualize=visualize, n_mc_experiments=n_mc_experiments, save_path=tmp_dir)
+    if visualize:
+        close("all")  # close all figures to avoid memory leak