From 532ba3bfcd5a8de1420575a7aa7703a3a5406ffc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Feb 2024 10:04:57 +0100 Subject: [PATCH 1/3] [pre-commit.ci] pre-commit autoupdate (#1354) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b2a594b2f..0efcc6587 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.2.0 hooks: - id: ruff types_or: [python, pyi, jupyter] From 4b4c92fa8fc61e71413a70b6978ab180ab54db80 Mon Sep 17 00:00:00 2001 From: Severin Dicks <37635888+Intron7@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:07:39 +0100 Subject: [PATCH 2/3] added yaml for gpu_ci (#1359) * Set 30min timeout for GPU tests * added yaml for gpu_ci * fixes typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * List pip packages too --------- Co-authored-by: Isaac Virshup Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .ci/gpu_ci.yml | 12 ++++++++++++ .github/workflows/test-gpu.yml | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) create mode 100644 .ci/gpu_ci.yml diff --git a/.ci/gpu_ci.yml b/.ci/gpu_ci.yml new file mode 100644 index 000000000..9776ec3d5 --- /dev/null +++ b/.ci/gpu_ci.yml @@ -0,0 +1,12 @@ +name: cupy_env +channels: + - nvidia + - conda-forge +dependencies: + - python=3.12 + - cuda-version=11.8 + - cupy + - numba + - pytest + - pytest-cov + - pytest-xdist diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml index 104396116..5d93f10d9 100644 --- a/.github/workflows/test-gpu.yml +++ b/.github/workflows/test-gpu.yml @@ -35,9 +35,14 @@ jobs: name: GPU Tests needs: check runs-on: "cirun-aws-gpu--${{ github.run_id }}" + # Setting a timeout of 30 minutes, as the AWS costs money + # At time of writing, a typical run takes about 5 minutes + timeout-minutes: 30 + defaults: run: shell: bash -el {0} + steps: - uses: actions/checkout@v3 with: @@ -49,14 +54,7 @@ jobs: - uses: mamba-org/setup-micromamba@v1 with: micromamba-version: "1.3.1-0" - environment-name: anndata-gpu-ci - create-args: >- - python=3.11 - cupy - numba - pytest - pytest-cov - pytest-xdist + environment-file: .ci/gpu_ci.yml init-shell: >- bash generate-run-shell: false @@ -64,8 +62,10 @@ jobs: - name: Install AnnData run: pip install .[dev,test,gpu] - - name: Mamba list - run: micromamba list + - name: Env list + run: | + micromamba list + pip list - name: Run test run: pytest -m gpu --cov --cov-report=xml --cov-context=test -n 4 From f10bbdec3ae4a74571dfc955043c377f17536667 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Thu, 8 Feb 2024 11:14:47 +0100 Subject: [PATCH 3/3] (feat): add setting to retain categories (#1340) * (feat): add options features. * (feat): tests, doc strings * (feat): add settings to docs * (fix): add `describe_option` to exports, try to fix docs errors * (chore): add reset test * (fix): no multi-inheritance in py3.9 for NamedTuple * (refactor): use decorator * (chore): move options section * (chore): add release note * (refactor): class based implementation * (feat): add deprecation * (chore): clean up docstrings and variables * (chore): redo release note * (bug): fix `api.md` * (style): fix grammar * finish up typing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * style * (feat): use attributes instead of items * (chore): no boolean without * * (feat): support multi-option functionality * (feat): add `__dir__` method * (fix): `default_value` typing * (feat): dynamic docstring as class method * (feat): tab completion in jupyter notebook for override * (feat): tab completion in jupyter notebook for override * (feat): docstring for `override` * (refactor): do docstring update in wrapped function * (chore): remove docstring types. * (fix): `KeyError` -> `AttributeError` * (refactor): `setattr` -> direct setting * (refactor): no more decorator for updating `override` * (refactor): relabel options docstring variable * (fix): docstring tab * (chore): add `override` to docs * (fix): clean up docstring methods * (chore): clean up unused methods/objects * (chore): add extra test * (fix): remove evironment variables * (feat): add setting to retain categories * (chore): add extra test for `allowed_values=None` * (chore): docs * (chore): clarify `override` usage * Apply suggestions from code review Co-authored-by: Philipp A. * (chore): add `dir` test * (chore): use mocking * (fix): small docstring fix * (fix): validator api + tests with nice warnings * (chore): remove leading space from note * (chore): update from validation change * (chore): make docstring clearer * (fix): use `add_note` * (refactor): unnecessary `else` in guard clause * (fix): do not raise DeprecationWarning * (chore): add 0/1 to docs for boolean env variables * (chore): move env variable fetching into `register` * (fix): typing from `cast` * (chore): docs link * (fix): use `Enum` properly --------- Co-authored-by: Philipp A Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- anndata/_config.py | 85 ++++++++++++++++++++++++++- anndata/_core/anndata.py | 6 +- anndata/tests/test_base.py | 10 ++++ anndata/tests/test_config.py | 110 ++++++++++++++++++++++++++++++++--- docs/release-notes/0.11.0.md | 1 + 5 files changed, 200 insertions(+), 12 deletions(-) diff --git a/anndata/_config.py b/anndata/_config.py index 4a8fc0d42..5128d79b6 100644 --- a/anndata/_config.py +++ b/anndata/_config.py @@ -1,16 +1,18 @@ from __future__ import annotations +import os import textwrap import warnings from collections.abc import Iterable from contextlib import contextmanager +from enum import Enum from inspect import Parameter, signature -from typing import TYPE_CHECKING, NamedTuple, TypeVar +from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar from anndata.compat.exceptiongroups import add_note if TYPE_CHECKING: - from collections.abc import Callable + from collections.abc import Callable, Sequence T = TypeVar("T") @@ -30,6 +32,55 @@ class RegisteredOption(NamedTuple): type: object +def check_and_get_environ_var( + key: str, + default_value: str, + allowed_values: Sequence[str] | None = None, + cast: Callable[[Any], T] | type[Enum] = lambda x: x, +) -> T: + """Get the environment variable and return it is a (potentially) non-string, usable value. + + Parameters + ---------- + key + The environment variable name. + default_value + The default value for `os.environ.get`. + allowed_values + Allowable string values., by default None + cast + Casting from the string to a (potentially different) python object, by default lambdax:x + + Returns + ------- + The casted value. + """ + environ_value_or_default_value = os.environ.get(key, default_value) + if ( + allowed_values is not None + and environ_value_or_default_value not in allowed_values + ): + warnings.warn( + f'Value "{environ_value_or_default_value}" is not in allowed {allowed_values} for environment variable {key}.\ + Default {default_value} will be used.' + ) + environ_value_or_default_value = default_value + return ( + cast(environ_value_or_default_value) + if not isinstance(cast, type(Enum)) + else cast[environ_value_or_default_value] + ) + + +def check_and_get_bool(option, default_value): + return check_and_get_environ_var( + "ANNDATA_" + option.upper(), + str(int(default_value)), + ["0", "1"], + lambda x: bool(int(x)), + ) + + _docstring = """ This manager allows users to customize settings for the anndata package. Settings here will generally be for advanced use-cases and should be used with caution. @@ -39,6 +90,8 @@ class RegisteredOption(NamedTuple): {options_description} For setting an option please use :func:`~anndata.settings.override` (local) or set the above attributes directly (global) i.e., `anndata.settings.my_setting = foo`. +For assignment by environment variable, use the variable name in all caps with `ANNDATA_` as the prefix before import of :mod:`anndata`. +For boolean environment variable setting, use 1 for `True` and 0 for `False`. """ @@ -111,6 +164,7 @@ def register( description: str, validate: Callable[[T], bool], option_type: object | None = None, + get_from_env: Callable[[str, T], T] = lambda x, y: y, ) -> None: """Register an option so it can be set/described etc. by end-users @@ -126,6 +180,9 @@ def register( A function which returns True if the option's value is valid and otherwise should raise a `ValueError` or `TypeError`. option Optional override for the option type to be displayed. Otherwise `type(default_value)`. + get_from_env + An optional function which takes as arguments the name of the option and a default value and returns the value from the environment variable `ANNDATA_CAPS_OPTION` (or default if not present). + Default behavior is to return `default_value` without checking the environment. """ try: validate(default_value) @@ -144,7 +201,7 @@ def register( self._registered_options[option] = RegisteredOption( option, default_value, doc, validate, option_type ) - self._config[option] = default_value + self._config[option] = get_from_env(option, default_value) self._update_override_function_for_new_option(option) def _update_override_function_for_new_option( @@ -294,5 +351,27 @@ def __doc__(self): # PLACE REGISTERED SETTINGS HERE SO THEY CAN BE PICKED UP FOR DOCSTRING CREATION # ################################################################################## + +categories_option = "remove_unused_categories" +categories_default_value = True +categories_description = ( + "Whether or not to remove unused categories with :class:`~pandas.Categorical`." +) + + +def validate_bool(val) -> bool: + if not isinstance(val, bool): + raise TypeError(f"{val} not valid boolean") + return True + + +settings.register( + categories_option, + categories_default_value, + categories_description, + validate_bool, + get_from_env=check_and_get_bool, +) + ################################################################################## ################################################################################## diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index a2b010bdf..e745ee312 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -30,6 +30,7 @@ from anndata._warnings import ImplicitModificationWarning from .. import utils +from .._config import settings from ..compat import ( CupyArray, CupySparseMatrix, @@ -413,8 +414,9 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index): self._varp = adata_ref.varp._view(self, vidx) # fix categories uns = copy(adata_ref._uns) - self._remove_unused_categories(adata_ref.obs, obs_sub, uns) - self._remove_unused_categories(adata_ref.var, var_sub, uns) + if settings.remove_unused_categories: + self._remove_unused_categories(adata_ref.obs, obs_sub, uns) + self._remove_unused_categories(adata_ref.var, var_sub, uns) # set attributes self._obs = DataFrameView(obs_sub, view_args=(self, "obs")) self._var = DataFrameView(var_sub, view_args=(self, "var")) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index 14127271f..1c6186fb5 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -12,6 +12,7 @@ from scipy.sparse import csr_matrix, issparse from anndata import AnnData +from anndata._config import settings from anndata.tests.helpers import assert_equal, gen_adata # some test objects that we use below @@ -399,6 +400,15 @@ def test_slicing_remove_unused_categories(): assert adata[2:4].obs["k"].cat.categories.tolist() == ["b"] +def test_slicing_dont_remove_unused_categories(): + with settings.override(remove_unused_categories=False): + adata = AnnData( + np.array([[1, 2], [3, 4], [5, 6], [7, 8]]), dict(k=["a", "a", "b", "b"]) + ) + adata._sanitize() + assert adata[2:4].obs["k"].cat.categories.tolist() == ["a", "b"] + + def test_get_subset_annotation(): adata = AnnData( np.array([[1, 2, 3], [4, 5, 6]]), diff --git a/anndata/tests/test_config.py b/anndata/tests/test_config.py index 76961f1a1..dfd613cd1 100644 --- a/anndata/tests/test_config.py +++ b/anndata/tests/test_config.py @@ -1,8 +1,16 @@ from __future__ import annotations +import os +from enum import Enum + import pytest -from anndata._config import SettingsManager +from anndata._config import ( + SettingsManager, + check_and_get_bool, + check_and_get_environ_var, + validate_bool, +) option = "test_var" default_val = False @@ -18,12 +26,6 @@ type_3 = list[int] -def validate_bool(val) -> bool: - if not isinstance(val, bool): - raise TypeError(f"{val} not valid boolean") - return True - - def validate_int_list(val) -> bool: if not isinstance(val, list) or not [isinstance(type(e), int) for e in val]: raise TypeError(f"{repr(val)} is not a valid int list") @@ -49,6 +51,53 @@ def test_register_option_default(): assert description in settings.describe(option) +def test_register_with_env(monkeypatch): + with monkeypatch.context() as mp: + option_env = "test_var_env" + default_val_env = False + description_env = "My doc string env!" + option_env_var = "ANNDATA_" + option_env.upper() + mp.setenv(option_env_var, "1") + + settings.register( + option_env, + default_val_env, + description_env, + validate_bool, + get_from_env=check_and_get_bool, + ) + + assert settings.test_var_env + + +def test_register_with_env_enum(monkeypatch): + with monkeypatch.context() as mp: + option_env = "test_var_env" + default_val_env = False + description_env = "My doc string env!" + option_env_var = "ANNDATA_" + option_env.upper() + mp.setenv(option_env_var, "b") + + class TestEnum(Enum): + a = False + b = True + + def check_and_get_bool_enum(option, default_value): + return check_and_get_environ_var( + "ANNDATA_" + option.upper(), "a", cast=TestEnum + ).value + + settings.register( + option_env, + default_val_env, + description_env, + validate_bool, + get_from_env=check_and_get_bool_enum, + ) + + assert settings.test_var_env + + def test_register_bad_option(): with pytest.raises(TypeError, match="'foo' is not a valid int list"): settings.register( @@ -129,3 +178,50 @@ def test_deprecation_no_message(): def test_option_typing(): assert settings._registered_options[option_3].type == type_3 assert str(type_3) in settings.describe(option_3, print_description=False) + + +def test_check_and_get_environ_var(monkeypatch): + with monkeypatch.context() as mp: + option_env_var = "ANNDATA_OPTION" + assert hash("foo") == check_and_get_environ_var( + option_env_var, "foo", ["foo", "bar"], lambda x: hash(x) + ) + mp.setenv(option_env_var, "bar") + assert hash("bar") == check_and_get_environ_var( + option_env_var, "foo", ["foo", "bar"], lambda x: hash(x) + ) + mp.setenv(option_env_var, "Not foo or bar") + with pytest.warns( + match=f'Value "{os.environ[option_env_var]}" is not in allowed' + ): + check_and_get_environ_var( + option_env_var, "foo", ["foo", "bar"], lambda x: hash(x) + ) + assert hash("Not foo or bar") == check_and_get_environ_var( + option_env_var, "foo", cast=lambda x: hash(x) + ) + + +def test_check_and_get_bool(monkeypatch): + with monkeypatch.context() as mp: + option_env_var = "ANNDATA_" + option.upper() + assert not check_and_get_bool(option, default_val) + mp.setenv(option_env_var, "1") + assert check_and_get_bool(option, default_val) + mp.setenv(option_env_var, "Not 0 or 1") + with pytest.warns( + match=f'Value "{os.environ[option_env_var]}" is not in allowed' + ): + check_and_get_bool(option, default_val) + + +def test_check_and_get_bool_enum(monkeypatch): + with monkeypatch.context() as mp: + option_env_var = "ANNDATA_" + option.upper() + mp.setenv(option_env_var, "b") + + class TestEnum(Enum): + a = False + b = True + + assert check_and_get_environ_var(option_env_var, "a", cast=TestEnum).value diff --git a/docs/release-notes/0.11.0.md b/docs/release-notes/0.11.0.md index e35e67b0d..220e176c4 100644 --- a/docs/release-notes/0.11.0.md +++ b/docs/release-notes/0.11.0.md @@ -3,6 +3,7 @@ ```{rubric} Features ``` * Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {pr}`1270` {user}`ilan-gold` +* Add `remove_unused_categories` option to `anndata.settings` to override current behavior. Default is `True` (i.e., previous behavior). Please refer to the [documentation](https://anndata.readthedocs.io/en/latest/generated/anndata.settings.html) for usage. {pr}`1340` {user}`ilan-gold` ```{rubric} Bugfix ```