Add offline policy evaluation module and update dependencies

### Changes * Introduced `offline_policy_evaluator.py` with classes for propensity score estimation and offline policy evaluation. * Introduced `offline_policy_estimator.py` with classes for offline policy estimation. * Updated `pyproject.toml` to include new dependencies: `bokeh` and `optuna`. Further adjusted existing dependencies to compatible versions and added python 3.12 support. * Changed .pre-commit-config.yaml to utilize nbstripout instead of nbdev_clean. * Added caching of dependencies on CI and CD. * Added class method to PyBanditsBaseModel on base.py to allow seeing default values for arguments that were not passed to the model. * Added test_offline_policy_evaluator.py and test_offline_policy_estimator.py as a test suite for the OfflinePolicyEvaluator. * Added `get_non_abstract_classes`, `visualize_via_bokeh` and `in_jupyter_notebook` utility functions.
PlaytikaOSS · Oct 27, 2024 · eb2eebe · eb2eebe
1 parent bd54568
commit eb2eebe
Show file tree

Hide file tree

Showing 22 changed files with 2,741 additions and 1,493 deletions.
diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml
@@ -29,10 +29,23 @@ jobs:
           export PATH="$HOME/.poetry/bin:$PATH"
       - name: Backup pyproject.toml
         run: cp pyproject.toml pyproject.toml.bak
+      - name: Change pydantic version
+        run: |
+          poetry add pydantic@${{ matrix.pydantic-version }} --lock
+      - name: Cache Poetry virtualenv and dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.local/share/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
       - name: Install project dependencies with Poetry
         run: |
-          poetry add pydantic@${{ matrix.pydantic-version }}
           poetry install
+      - name: Restore pyproject.toml
+        run: |
           mv pyproject.toml.bak pyproject.toml
       - name: Style check
         run: |

diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.8", "3.9", "3.10" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         pydantic-version: [ "1.10.*", "2.*" ]
 
     steps:
@@ -35,14 +35,29 @@ jobs:
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
           export PATH="$HOME/.poetry/bin:$PATH"
+      - name: Change pydantic version
+        run: |
+          poetry add pydantic@${{ matrix.pydantic-version }} --lock
+      - name: Cache Poetry virtualenv and dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.local/share/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
       - name: Install project dependencies with Poetry
         run: |
-          poetry add pydantic@${{ matrix.pydantic-version }}
           poetry install
       - name: Style check
         run: |
           # run pre-commit hooks
           poetry run pre-commit run --all-files
       - name: Run tests
         run: |
+          START_TIME=$(date +%s)
           poetry run pytest -vv -k 'not time and not update_parallel'
+          END_TIME=$(date +%s)
+          DURATION=$((END_TIME - START_TIME))
+          echo "Tests completed in $DURATION seconds."
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
         types_or: [ python, pyi, jupyter ]
         require_serial: true
 
-  - repo: https://github.com/fastai/nbdev
-    rev: 2.3.11
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
     hooks:
-    - id: nbdev_clean
+      - id: nbstripout
diff --git a/docs/src/tutorials/cmab.ipynb b/docs/src/tutorials/cmab.ipynb
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
@@ -56,31 +56,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "pycharm": {
      "is_executing": false
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X: context matrix of shape (n_samples, n_features)\n",
-      "[[-0.53211475 -0.40592956  0.05892565 -0.88067628 -0.84061481]\n",
-      " [-0.95680954 -0.00540581  0.09148556 -0.82021004 -0.63425381]\n",
-      " [-0.87792928 -0.51881823 -0.51767022 -0.05385187 -0.64499044]\n",
-      " [-0.10569516  0.30847784 -0.353929   -0.94831998 -0.52175713]\n",
-      " [-0.05088401  0.17155683 -0.4322128  -0.07509104 -0.78919832]\n",
-      " [-0.88604157  0.55037109  0.42634479 -0.87179776 -0.69767766]\n",
-      " [-0.0022063   0.99304089  0.76398198 -0.87343131 -0.12363411]\n",
-      " [ 0.36371019  0.6660538   0.17177652 -0.08891719 -0.91070485]\n",
-      " [-0.1056742  -0.72879406 -0.69367421 -0.8684397   0.70903817]\n",
-      " [-0.15422305  0.31069811 -0.47487951  0.00853137  0.23793364]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# context\n",
     "n_samples = 1000\n",
@@ -92,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -109,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,18 +108,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Recommended action: ['action C' 'action C' 'action B' 'action B' 'action C' 'action C'\n",
-      " 'action B' 'action C' 'action B' 'action C']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# predict action\n",
     "pred_actions, _ = cmab.predict(X)\n",
@@ -153,17 +126,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Simulated rewards: [1 0 0 0 0 0 0 0 1 1]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# simulate reward from environment\n",
     "simulated_rewards = np.random.randint(2, size=n_samples)\n",
@@ -179,31 +144,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
-      "Auto-assigning NUTS sampler...\n",
-      "Initializing NUTS using adapt_diag...\n",
-      "Sequential sampling (2 chains in 1 job)\n",
-      "NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
-      "Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# update model\n",
     "cmab.update(X, actions=pred_actions, rewards=simulated_rewards)"