From 532ba3bfcd5a8de1420575a7aa7703a3a5406ffc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 10:04:57 +0100
Subject: [PATCH 1/3] [pre-commit.ci] pre-commit autoupdate (#1354)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b2a594b2f..0efcc6587 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.14
+    rev: v0.2.0
     hooks:
       - id: ruff
         types_or: [python, pyi, jupyter]

From 4b4c92fa8fc61e71413a70b6978ab180ab54db80 Mon Sep 17 00:00:00 2001
From: Severin Dicks <37635888+Intron7@users.noreply.github.com>
Date: Wed, 7 Feb 2024 15:07:39 +0100
Subject: [PATCH 2/3] added yaml for gpu_ci (#1359)

* Set 30min timeout for GPU tests

* added yaml for gpu_ci

* fixes typo

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* List pip packages too

---------

Co-authored-by: Isaac Virshup <ivirshup@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .ci/gpu_ci.yml                 | 12 ++++++++++++
 .github/workflows/test-gpu.yml | 20 ++++++++++----------
 2 files changed, 22 insertions(+), 10 deletions(-)
 create mode 100644 .ci/gpu_ci.yml

diff --git a/.ci/gpu_ci.yml b/.ci/gpu_ci.yml
new file mode 100644
index 000000000..9776ec3d5
--- /dev/null
+++ b/.ci/gpu_ci.yml
@@ -0,0 +1,12 @@
+name: cupy_env
+channels:
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.12
+  - cuda-version=11.8
+  - cupy
+  - numba
+  - pytest
+  - pytest-cov
+  - pytest-xdist
diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml
index 104396116..5d93f10d9 100644
--- a/.github/workflows/test-gpu.yml
+++ b/.github/workflows/test-gpu.yml
@@ -35,9 +35,14 @@ jobs:
     name: GPU Tests
     needs: check
     runs-on: "cirun-aws-gpu--${{ github.run_id }}"
+    # Setting a timeout of 30 minutes, as the AWS costs money
+    # At time of writing, a typical run takes about 5 minutes
+    timeout-minutes: 30
+
     defaults:
       run:
         shell: bash -el {0}
+
     steps:
       - uses: actions/checkout@v3
         with:
@@ -49,14 +54,7 @@ jobs:
       - uses: mamba-org/setup-micromamba@v1
         with:
           micromamba-version: "1.3.1-0"
-          environment-name: anndata-gpu-ci
-          create-args: >-
-            python=3.11
-            cupy
-            numba
-            pytest
-            pytest-cov
-            pytest-xdist
+          environment-file: .ci/gpu_ci.yml
           init-shell: >-
             bash
           generate-run-shell: false
@@ -64,8 +62,10 @@ jobs:
       - name: Install AnnData
         run: pip install .[dev,test,gpu]
 
-      - name: Mamba list
-        run: micromamba list
+      - name: Env list
+        run: |
+          micromamba list
+          pip list
 
       - name: Run test
         run: pytest -m gpu --cov --cov-report=xml --cov-context=test -n 4

From f10bbdec3ae4a74571dfc955043c377f17536667 Mon Sep 17 00:00:00 2001
From: Ilan Gold <ilanbassgold@gmail.com>
Date: Thu, 8 Feb 2024 11:14:47 +0100
Subject: [PATCH 3/3] (feat): add setting to retain categories (#1340)

* (feat): add options features.

* (feat): tests, doc strings

* (feat): add settings to docs

* (fix): add `describe_option` to exports, try to fix docs errors

* (chore): add reset test

* (fix): no multi-inheritance in py3.9 for NamedTuple

* (refactor): use decorator

* (chore): move options section

* (chore): add release note

* (refactor): class based implementation

* (feat): add deprecation

* (chore): clean up docstrings and variables

* (chore): redo release note

* (bug): fix `api.md`

* (style): fix grammar

* finish up typing

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* style

* (feat): use attributes instead of items

* (chore): no boolean without *

* (feat): support multi-option functionality

* (feat): add `__dir__` method

* (fix): `default_value` typing

* (feat): dynamic docstring as class method

* (feat): tab completion in jupyter notebook for override

* (feat): tab completion in jupyter notebook for override

* (feat): docstring for `override`

* (refactor): do docstring update in wrapped function

* (chore): remove docstring types.

* (fix): `KeyError` -> `AttributeError`

* (refactor): `setattr` -> direct setting

* (refactor): no more decorator for updating `override`

* (refactor): relabel options docstring variable

* (fix): docstring tab

* (chore): add `override` to docs

* (fix): clean up docstring methods

* (chore): clean up unused methods/objects

* (chore): add extra test

* (fix): remove evironment variables

* (feat): add setting to retain categories

* (chore): add extra test for `allowed_values=None`

* (chore): docs

* (chore): clarify `override` usage

* Apply suggestions from code review

Co-authored-by: Philipp A. <flying-sheep@web.de>

* (chore): add `dir` test

* (chore): use mocking

* (fix): small docstring fix

* (fix): validator api + tests with nice warnings

* (chore): remove leading space from note

* (chore): update from validation change

* (chore): make docstring clearer

* (fix): use `add_note`

* (refactor): unnecessary `else` in guard clause

* (fix): do not raise DeprecationWarning

* (chore): add 0/1 to docs for boolean env variables

* (chore): move env variable fetching into `register`

* (fix): typing from `cast`

* (chore): docs link

* (fix): use `Enum` properly

---------

Co-authored-by: Philipp A <flying-sheep@web.de>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 anndata/_config.py           |  85 ++++++++++++++++++++++++++-
 anndata/_core/anndata.py     |   6 +-
 anndata/tests/test_base.py   |  10 ++++
 anndata/tests/test_config.py | 110 ++++++++++++++++++++++++++++++++---
 docs/release-notes/0.11.0.md |   1 +
 5 files changed, 200 insertions(+), 12 deletions(-)

diff --git a/anndata/_config.py b/anndata/_config.py
index 4a8fc0d42..5128d79b6 100644
--- a/anndata/_config.py
+++ b/anndata/_config.py
@@ -1,16 +1,18 @@
 from __future__ import annotations
 
+import os
 import textwrap
 import warnings
 from collections.abc import Iterable
 from contextlib import contextmanager
+from enum import Enum
 from inspect import Parameter, signature
-from typing import TYPE_CHECKING, NamedTuple, TypeVar
+from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
 
 from anndata.compat.exceptiongroups import add_note
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Callable, Sequence
 
 T = TypeVar("T")
 
@@ -30,6 +32,55 @@ class RegisteredOption(NamedTuple):
     type: object
 
 
+def check_and_get_environ_var(
+    key: str,
+    default_value: str,
+    allowed_values: Sequence[str] | None = None,
+    cast: Callable[[Any], T] | type[Enum] = lambda x: x,
+) -> T:
+    """Get the environment variable and return it is a (potentially) non-string, usable value.
+
+    Parameters
+    ----------
+    key
+        The environment variable name.
+    default_value
+        The default value for `os.environ.get`.
+    allowed_values
+        Allowable string values., by default None
+    cast
+        Casting from the string to a (potentially different) python object, by default lambdax:x
+
+    Returns
+    -------
+    The casted value.
+    """
+    environ_value_or_default_value = os.environ.get(key, default_value)
+    if (
+        allowed_values is not None
+        and environ_value_or_default_value not in allowed_values
+    ):
+        warnings.warn(
+            f'Value "{environ_value_or_default_value}" is not in allowed {allowed_values} for environment variable {key}.\
+                      Default {default_value} will be used.'
+        )
+        environ_value_or_default_value = default_value
+    return (
+        cast(environ_value_or_default_value)
+        if not isinstance(cast, type(Enum))
+        else cast[environ_value_or_default_value]
+    )
+
+
+def check_and_get_bool(option, default_value):
+    return check_and_get_environ_var(
+        "ANNDATA_" + option.upper(),
+        str(int(default_value)),
+        ["0", "1"],
+        lambda x: bool(int(x)),
+    )
+
+
 _docstring = """
 This manager allows users to customize settings for the anndata package.
 Settings here will generally be for advanced use-cases and should be used with caution.
@@ -39,6 +90,8 @@ class RegisteredOption(NamedTuple):
 {options_description}
 
 For setting an option please use :func:`~anndata.settings.override` (local) or set the above attributes directly (global) i.e., `anndata.settings.my_setting = foo`.
+For assignment by environment variable, use the variable name in all caps with `ANNDATA_` as the prefix before import of :mod:`anndata`.
+For boolean environment variable setting, use 1 for `True` and 0 for `False`.
 """
 
 
@@ -111,6 +164,7 @@ def register(
         description: str,
         validate: Callable[[T], bool],
         option_type: object | None = None,
+        get_from_env: Callable[[str, T], T] = lambda x, y: y,
     ) -> None:
         """Register an option so it can be set/described etc. by end-users
 
@@ -126,6 +180,9 @@ def register(
             A function which returns True if the option's value is valid and otherwise should raise a `ValueError` or `TypeError`.
         option
             Optional override for the option type to be displayed.  Otherwise `type(default_value)`.
+        get_from_env
+            An optional function which takes as arguments the name of the option and a default value and returns the value from the environment variable `ANNDATA_CAPS_OPTION` (or default if not present).
+            Default behavior is to return `default_value` without checking the environment.
         """
         try:
             validate(default_value)
@@ -144,7 +201,7 @@ def register(
         self._registered_options[option] = RegisteredOption(
             option, default_value, doc, validate, option_type
         )
-        self._config[option] = default_value
+        self._config[option] = get_from_env(option, default_value)
         self._update_override_function_for_new_option(option)
 
     def _update_override_function_for_new_option(
@@ -294,5 +351,27 @@ def __doc__(self):
 # PLACE REGISTERED SETTINGS HERE SO THEY CAN BE PICKED UP FOR DOCSTRING CREATION #
 ##################################################################################
 
+
+categories_option = "remove_unused_categories"
+categories_default_value = True
+categories_description = (
+    "Whether or not to remove unused categories with :class:`~pandas.Categorical`."
+)
+
+
+def validate_bool(val) -> bool:
+    if not isinstance(val, bool):
+        raise TypeError(f"{val} not valid boolean")
+    return True
+
+
+settings.register(
+    categories_option,
+    categories_default_value,
+    categories_description,
+    validate_bool,
+    get_from_env=check_and_get_bool,
+)
+
 ##################################################################################
 ##################################################################################
diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
index a2b010bdf..e745ee312 100644
--- a/anndata/_core/anndata.py
+++ b/anndata/_core/anndata.py
@@ -30,6 +30,7 @@
 from anndata._warnings import ImplicitModificationWarning
 
 from .. import utils
+from .._config import settings
 from ..compat import (
     CupyArray,
     CupySparseMatrix,
@@ -413,8 +414,9 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
         self._varp = adata_ref.varp._view(self, vidx)
         # fix categories
         uns = copy(adata_ref._uns)
-        self._remove_unused_categories(adata_ref.obs, obs_sub, uns)
-        self._remove_unused_categories(adata_ref.var, var_sub, uns)
+        if settings.remove_unused_categories:
+            self._remove_unused_categories(adata_ref.obs, obs_sub, uns)
+            self._remove_unused_categories(adata_ref.var, var_sub, uns)
         # set attributes
         self._obs = DataFrameView(obs_sub, view_args=(self, "obs"))
         self._var = DataFrameView(var_sub, view_args=(self, "var"))
diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py
index 14127271f..1c6186fb5 100644
--- a/anndata/tests/test_base.py
+++ b/anndata/tests/test_base.py
@@ -12,6 +12,7 @@
 from scipy.sparse import csr_matrix, issparse
 
 from anndata import AnnData
+from anndata._config import settings
 from anndata.tests.helpers import assert_equal, gen_adata
 
 # some test objects that we use below
@@ -399,6 +400,15 @@ def test_slicing_remove_unused_categories():
     assert adata[2:4].obs["k"].cat.categories.tolist() == ["b"]
 
 
+def test_slicing_dont_remove_unused_categories():
+    with settings.override(remove_unused_categories=False):
+        adata = AnnData(
+            np.array([[1, 2], [3, 4], [5, 6], [7, 8]]), dict(k=["a", "a", "b", "b"])
+        )
+        adata._sanitize()
+        assert adata[2:4].obs["k"].cat.categories.tolist() == ["a", "b"]
+
+
 def test_get_subset_annotation():
     adata = AnnData(
         np.array([[1, 2, 3], [4, 5, 6]]),
diff --git a/anndata/tests/test_config.py b/anndata/tests/test_config.py
index 76961f1a1..dfd613cd1 100644
--- a/anndata/tests/test_config.py
+++ b/anndata/tests/test_config.py
@@ -1,8 +1,16 @@
 from __future__ import annotations
 
+import os
+from enum import Enum
+
 import pytest
 
-from anndata._config import SettingsManager
+from anndata._config import (
+    SettingsManager,
+    check_and_get_bool,
+    check_and_get_environ_var,
+    validate_bool,
+)
 
 option = "test_var"
 default_val = False
@@ -18,12 +26,6 @@
 type_3 = list[int]
 
 
-def validate_bool(val) -> bool:
-    if not isinstance(val, bool):
-        raise TypeError(f"{val} not valid boolean")
-    return True
-
-
 def validate_int_list(val) -> bool:
     if not isinstance(val, list) or not [isinstance(type(e), int) for e in val]:
         raise TypeError(f"{repr(val)} is not a valid int list")
@@ -49,6 +51,53 @@ def test_register_option_default():
     assert description in settings.describe(option)
 
 
+def test_register_with_env(monkeypatch):
+    with monkeypatch.context() as mp:
+        option_env = "test_var_env"
+        default_val_env = False
+        description_env = "My doc string env!"
+        option_env_var = "ANNDATA_" + option_env.upper()
+        mp.setenv(option_env_var, "1")
+
+        settings.register(
+            option_env,
+            default_val_env,
+            description_env,
+            validate_bool,
+            get_from_env=check_and_get_bool,
+        )
+
+        assert settings.test_var_env
+
+
+def test_register_with_env_enum(monkeypatch):
+    with monkeypatch.context() as mp:
+        option_env = "test_var_env"
+        default_val_env = False
+        description_env = "My doc string env!"
+        option_env_var = "ANNDATA_" + option_env.upper()
+        mp.setenv(option_env_var, "b")
+
+        class TestEnum(Enum):
+            a = False
+            b = True
+
+        def check_and_get_bool_enum(option, default_value):
+            return check_and_get_environ_var(
+                "ANNDATA_" + option.upper(), "a", cast=TestEnum
+            ).value
+
+        settings.register(
+            option_env,
+            default_val_env,
+            description_env,
+            validate_bool,
+            get_from_env=check_and_get_bool_enum,
+        )
+
+        assert settings.test_var_env
+
+
 def test_register_bad_option():
     with pytest.raises(TypeError, match="'foo' is not a valid int list"):
         settings.register(
@@ -129,3 +178,50 @@ def test_deprecation_no_message():
 def test_option_typing():
     assert settings._registered_options[option_3].type == type_3
     assert str(type_3) in settings.describe(option_3, print_description=False)
+
+
+def test_check_and_get_environ_var(monkeypatch):
+    with monkeypatch.context() as mp:
+        option_env_var = "ANNDATA_OPTION"
+        assert hash("foo") == check_and_get_environ_var(
+            option_env_var, "foo", ["foo", "bar"], lambda x: hash(x)
+        )
+        mp.setenv(option_env_var, "bar")
+        assert hash("bar") == check_and_get_environ_var(
+            option_env_var, "foo", ["foo", "bar"], lambda x: hash(x)
+        )
+        mp.setenv(option_env_var, "Not foo or bar")
+        with pytest.warns(
+            match=f'Value "{os.environ[option_env_var]}" is not in allowed'
+        ):
+            check_and_get_environ_var(
+                option_env_var, "foo", ["foo", "bar"], lambda x: hash(x)
+            )
+        assert hash("Not foo or bar") == check_and_get_environ_var(
+            option_env_var, "foo", cast=lambda x: hash(x)
+        )
+
+
+def test_check_and_get_bool(monkeypatch):
+    with monkeypatch.context() as mp:
+        option_env_var = "ANNDATA_" + option.upper()
+        assert not check_and_get_bool(option, default_val)
+        mp.setenv(option_env_var, "1")
+        assert check_and_get_bool(option, default_val)
+        mp.setenv(option_env_var, "Not 0 or 1")
+        with pytest.warns(
+            match=f'Value "{os.environ[option_env_var]}" is not in allowed'
+        ):
+            check_and_get_bool(option, default_val)
+
+
+def test_check_and_get_bool_enum(monkeypatch):
+    with monkeypatch.context() as mp:
+        option_env_var = "ANNDATA_" + option.upper()
+        mp.setenv(option_env_var, "b")
+
+        class TestEnum(Enum):
+            a = False
+            b = True
+
+        assert check_and_get_environ_var(option_env_var, "a", cast=TestEnum).value
diff --git a/docs/release-notes/0.11.0.md b/docs/release-notes/0.11.0.md
index e35e67b0d..220e176c4 100644
--- a/docs/release-notes/0.11.0.md
+++ b/docs/release-notes/0.11.0.md
@@ -3,6 +3,7 @@
 ```{rubric} Features
 ```
 * Add `settings` object with methods for altering internally-used options, like checking for uniqueness on `obs`' index {pr}`1270` {user}`ilan-gold`
+* Add `remove_unused_categories` option to `anndata.settings` to override current behavior.  Default is `True` (i.e., previous behavior).  Please refer to the [documentation](https://anndata.readthedocs.io/en/latest/generated/anndata.settings.html) for usage.  {pr}`1340` {user}`ilan-gold`
 
 ```{rubric} Bugfix
 ```