Skip to content

Commit

Permalink
Add offline policy evaluation module and update dependencies
Browse files Browse the repository at this point in the history
 ### Changes
 * Introduced `offline_policy_evaluator.py` with classes for propensity score estimation and offline policy evaluation.
 * Introduced `offline_policy_estimator.py` with classes for offline policy estimation.
 * Updated `pyproject.toml` to include new dependencies: `bokeh` and `optuna`. Further adjusted existing dependencies to compatible versions and added python 3.12 support.
 * Changed .pre-commit-config.yaml to utilize nbstripout instead of nbdev_clean.
 * Added caching of dependencies on CI and CD.
 * Added class method to PyBanditsBaseModel on base.py to allow seeing default values for arguments that were not passed to the model.
 * Added test_offline_policy_evaluator.py and test_offline_policy_estimator.py as a test suite for the OfflinePolicyEvaluator.
 * Added `get_non_abstract_classes` and `visualize_via_bokeh` utility functions.
  • Loading branch information
shaharbar1 authored and Shahar-Bar committed Oct 28, 2024
1 parent 64913ef commit 74da683
Show file tree
Hide file tree
Showing 22 changed files with 2,624 additions and 1,494 deletions.
15 changes: 14 additions & 1 deletion .github/workflows/continuous_delivery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,23 @@ jobs:
export PATH="$HOME/.poetry/bin:$PATH"
- name: Backup pyproject.toml
run: cp pyproject.toml pyproject.toml.bak
- name: Change pydantic version
run: |
poetry add pydantic@${{ matrix.pydantic-version }} --lock
- name: Cache Poetry virtualenv and dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.local/share/pypoetry/virtualenvs
key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
- name: Install project dependencies with Poetry
run: |
poetry add pydantic@${{ matrix.pydantic-version }}
poetry install
- name: Restore pyproject.toml
run: |
mv pyproject.toml.bak pyproject.toml
- name: Style check
run: |
Expand Down
18 changes: 17 additions & 1 deletion .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.8", "3.9", "3.10" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
pydantic-version: [ "1.10.*", "2.*" ]

steps:
Expand All @@ -35,6 +35,18 @@ jobs:
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
- name: Change pydantic version
run: |
poetry add pydantic@${{ matrix.pydantic-version }} --lock
- name: Cache Poetry virtualenv and dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.local/share/pypoetry/virtualenvs
key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ matrix.pydantic-version }}-
- name: Install project dependencies with Poetry
run: |
poetry add pydantic@${{ matrix.pydantic-version }}
Expand All @@ -45,4 +57,8 @@ jobs:
poetry run pre-commit run --all-files
- name: Run tests
run: |
START_TIME=$(date +%s)
poetry run pytest -n auto -vv
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
echo "Tests completed in $DURATION seconds."
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:
types_or: [ python, pyi, jupyter ]
require_serial: true

- repo: https://github.com/fastai/nbdev
rev: 2.3.11
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbdev_clean
- id: nbstripout
79 changes: 11 additions & 68 deletions docs/src/tutorials/cmab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": false
Expand All @@ -56,31 +56,13 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X: context matrix of shape (n_samples, n_features)\n",
"[[-0.53211475 -0.40592956 0.05892565 -0.88067628 -0.84061481]\n",
" [-0.95680954 -0.00540581 0.09148556 -0.82021004 -0.63425381]\n",
" [-0.87792928 -0.51881823 -0.51767022 -0.05385187 -0.64499044]\n",
" [-0.10569516 0.30847784 -0.353929 -0.94831998 -0.52175713]\n",
" [-0.05088401 0.17155683 -0.4322128 -0.07509104 -0.78919832]\n",
" [-0.88604157 0.55037109 0.42634479 -0.87179776 -0.69767766]\n",
" [-0.0022063 0.99304089 0.76398198 -0.87343131 -0.12363411]\n",
" [ 0.36371019 0.6660538 0.17177652 -0.08891719 -0.91070485]\n",
" [-0.1056742 -0.72879406 -0.69367421 -0.8684397 0.70903817]\n",
" [-0.15422305 0.31069811 -0.47487951 0.00853137 0.23793364]]\n"
]
}
],
"outputs": [],
"source": [
"# context\n",
"n_samples = 1000\n",
Expand All @@ -92,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -109,7 +91,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -126,18 +108,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Recommended action: ['action C' 'action C' 'action B' 'action B' 'action C' 'action C'\n",
" 'action B' 'action C' 'action B' 'action C']\n"
]
}
],
"outputs": [],
"source": [
"# predict action\n",
"pred_actions, _ = cmab.predict(X)\n",
Expand All @@ -153,17 +126,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Simulated rewards: [1 0 0 0 0 0 0 0 1 1]\n"
]
}
],
"outputs": [],
"source": [
"# simulate reward from environment\n",
"simulated_rewards = np.random.randint(2, size=n_samples)\n",
Expand All @@ -179,31 +144,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using adapt_diag...\n",
"Sequential sampling (2 chains in 1 job)\n",
"NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
"Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 5 seconds.\n",
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using adapt_diag...\n",
"Sequential sampling (2 chains in 1 job)\n",
"NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
"Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n",
"Auto-assigning NUTS sampler...\n",
"Initializing NUTS using adapt_diag...\n",
"Sequential sampling (2 chains in 1 job)\n",
"NUTS: [beta4, beta3, beta2, beta1, beta0, alpha]\n",
"Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 3 seconds.\n"
]
}
],
"outputs": [],
"source": [
"# update model\n",
"cmab.update(X, actions=pred_actions, rewards=simulated_rewards)"
Expand Down
Loading

0 comments on commit 74da683

Please sign in to comment.