[misc] replace black with ruff format and lint, run autofixes (#1011)

* Add ruff-format to pre-commit * [misc] replace black with ruff format and lint, run autofixes * autofixes on tools and notebooks * Update tools/cellxgene_census_builder/src/cellxgene_census_builder/util.py Co-authored-by: Emanuele Bezzi <[email protected]> * Update tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/build_soma.py Co-authored-by: Emanuele Bezzi <[email protected]> * Address comment * Update api/python/cellxgene_census/src/cellxgene_census/experimental/__init__.py Co-authored-by: Emanuele Bezzi <[email protected]> * Update api/python/cellxgene_census/src/cellxgene_census/_util.py Co-authored-by: Emanuele Bezzi <[email protected]> * format arguments in _release_directory --------- Co-authored-by: Emanuele Bezzi <[email protected]>
chanzuckerberg · Feb 21, 2024 · aa0459d · aa0459d
1 parent 317fb41
commit aa0459d
Show file tree

Hide file tree

Showing 96 changed files with 1,562 additions and 1,177 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,33 +1,39 @@
 exclude: (^doc/)|(.*/venv/)
 default_stages: [commit]
 repos:
-  - repo: https://github.com/psf/black
-    rev: 23.12.1
-    hooks:
-      - id: black-jupyter
-        name: black-notebooks
-        files: ^api/python/notebooks
-        args: ["--config", "./api/python/notebooks/pyproject.toml"]
-      - id: black
-        name: black-cellxgene-census
-        files: ^api/python/cellxgene_census
-        args: ["--config", "./api/python/cellxgene_census/pyproject.toml"]
-      - id: black
-        name: black-tools
-        files: ^tools
-        args: ["--config", "./tools/pyproject.toml"]
-
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.1.15
+    rev: v0.2.2
     hooks:
       - id: ruff
         name: ruff-cellxgene-census
         files: ^api/python/cellxgene_census
+        types_or: [python, pyi, jupyter]
         args: ["--config=./api/python/cellxgene_census/pyproject.toml", "--fix"]
       - id: ruff
         name: ruff-tools
         files: ^tools
-        args: [ "--config=./tools/pyproject.toml", "--fix" ]
+        types_or: [python, pyi, jupyter]
+        args: ["--config=./tools/pyproject.toml", "--fix" ]
+      - id: ruff
+        name: ruff-notebooks
+        files: ^api/python/notebooks
+        types_or: [python, pyi, jupyter]
+        args: ["--config=./api/python/notebooks/pyproject.toml", "--fix"]
+      - id: ruff-format
+        name: ruff-format-cellxgene-census
+        files: ^api/python/cellxgene_census
+        types_or: [python, pyi, jupyter]
+        args: ["--config=./api/python/cellxgene_census/pyproject.toml"]
+      - id: ruff-format
+        name: ruff-format-tools
+        files: ^tools
+        types_or: [python, pyi, jupyter]
+        args: ["--config=./tools/pyproject.toml"]
+      - id: ruff-format
+        name: ruff-format-notebooks
+        files: ^api/python/notebooks
+        types_or: [python, pyi, jupyter]
+        args: ["--config=./api/python/notebooks/pyproject.toml"]
 
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.8.0
@@ -81,12 +87,6 @@ repos:
           - typing_extensions
           - types-PyYAML
 
-  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.7.1
-    hooks:
-      - id: nbqa-black
-        files: ^api/python/notebooks
-
   - repo: https://github.com/igorshubovych/markdownlint-cli
     rev: v0.39.0
     hooks:

diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
@@ -68,9 +68,75 @@ exclude = ["tests*"]  # exclude packages matching these glob patterns (empty by
 [tool.setuptools_scm]
 root = "../../.."
 
-[tool.black]
+[tool.ruff]
 line-length = 120
-target_version = ['py39']
+src = ["api/python/cellxgene_census/src"]
+target-version = "py38"
+
+[tool.ruff.lint]
+select = [
+    "F",  # Errors detected by Pyflakes
+    "E",  # Error detected by Pycodestyle
+    "W",  # Warning detected by Pycodestyle
+    "I",  # isort
+    "D",  # pydocstyle
+    "B",  # flake8-bugbear
+    "TID",  # flake8-tidy-imports
+    "C4",  # flake8-comprehensions
+    "BLE",  # flake8-blind-except
+    "UP",  # pyupgrade
+    "RUF100",  # Report unused noqa directives
+]
+ignore = [
+    # line too long -> we accept long comment lines; formatter gets rid of long code lines
+    "E501",
+    # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
+    "E731",
+    # allow I, O, l as variable names -> I is the identity matrix
+    "E741",
+    # Missing docstring in public package
+    "D104",
+    # Missing docstring in public module
+    "D100",
+    # Missing docstring in __init__
+    "D107",
+    # Errors from function calls in argument defaults. These are fine when the result is immutable.
+    "B008",
+    # __magic__ methods are are often self-explanatory, allow missing docstrings
+    "D105",
+    # first line should end with a period [Bug: doesn't work with single-line docstrings]
+    "D400",
+    # First line should be in imperative mood; try rephrasing
+    "D401",
+    ## Disable one in each pair of mutually incompatible rules
+    # We don’t want a blank line before a class docstring
+    "D203",
+    # We want docstrings to start immediately after the opening triple quote
+    "D213",
+    # Missing argument description in the docstring TODO: enable
+    "D417",
+    # Blank line required between summary line and description TODO: enable
+    "D205",
+    # Prefer absolute imports over relative imports from parent modules TODO: enable
+    "TID252",
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.per-file-ignores]
+"*/tests/*" = ["D"]
+"*/__init__.py" = ["F401"]
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
 
 [tool.mypy]
 show_error_codes = true
@@ -86,12 +152,3 @@ markers = [
     "experimental: tests for the `experimental` package",
     "lts_compat_check: check for compatibility with an LTS build",
 ]
-
-[tool.ruff]
-select = ["E", "F", "B", "I"]
-ignore = ["E501", "E402", "C408", ]
-line-length = 120
-target-version = "py39"
-
-[tool.ruff.isort]
-known-first-party =["cellxgene_census"]
diff --git a/api/python/cellxgene_census/src/cellxgene_census/__init__.py b/api/python/cellxgene_census/src/cellxgene_census/__init__.py
@@ -1,5 +1,4 @@
-"""
-An API to facilitate use of the CZI Science CELLxGENE Census. The Census is a versioned container of single-cell data hosted at `CELLxGENE Discover`_.
+"""An API to facilitate use of the CZI Science CELLxGENE Census. The Census is a versioned container of single-cell data hosted at `CELLxGENE Discover`_.
 
 The API is built on the `tiledbsoma` SOMA API, and provides a number of helper functions including:
 
@@ -23,7 +22,12 @@
 from importlib import metadata
 
 from ._get_anndata import get_anndata
-from ._open import download_source_h5ad, get_default_soma_context, get_source_h5ad_uri, open_soma
+from ._open import (
+    download_source_h5ad,
+    get_default_soma_context,
+    get_source_h5ad_uri,
+    open_soma,
+)
 from ._presence_matrix import get_presence_matrix
 from ._release_directory import (
     get_census_mirror_directory,

diff --git a/api/python/cellxgene_census/src/cellxgene_census/_experiment.py b/api/python/cellxgene_census/src/cellxgene_census/_experiment.py
@@ -2,8 +2,7 @@
 #
 # Licensed under the MIT License.
 
-"""
-Experiments handler.
+"""Experiments handler.
 
 Contains methods to retrieve SOMA Experiments.
 """
@@ -34,7 +33,6 @@ def _get_experiment(census: soma.Collection, organism: str) -> soma.Experiment:
         maturing
 
     Examples:
-
         >>> human = get_experiment(census, "homo sapiens")
 
         >>> human = get_experiment(census, "Homo sapiens")

diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
@@ -2,7 +2,7 @@
 #
 # Licensed under the MIT License.
 
-"""Get slice as AnnData
+"""Get slice as AnnData.
 
 Methods to retrieve slices of the census as AnnData objects.
 """
@@ -28,8 +28,7 @@ def get_anndata(
     var_coords: Optional[SparseDFCoord] = None,
     column_names: Optional[soma.AxisColumnNames] = None,
 ) -> anndata.AnnData:
-    """
-    Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
+    """Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
     and return it as an :class:`anndata.AnnData` object.
 
     Args:
@@ -81,4 +80,9 @@ def get_anndata(
         obs_query=soma.AxisQuery(value_filter=obs_value_filter, coords=obs_coords),
         var_query=soma.AxisQuery(value_filter=var_value_filter, coords=var_coords),
     ) as query:
-        return query.to_anndata(X_name=X_name, column_names=column_names, X_layers=X_layers, obsm_layers=obsm_layers)
+        return query.to_anndata(
+            X_name=X_name,
+            column_names=column_names,
+            X_layers=X_layers,
+            obsm_layers=obsm_layers,
+        )
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py
@@ -2,7 +2,7 @@
 #
 # Licensed under the MIT License.
 
-"""Open census and related datasets
+"""Open census and related datasets.
 
 Contains methods to open publicly hosted versions of Census object and access its source datasets.
 """
@@ -41,8 +41,7 @@
 
 
 def _assert_mirror_supported(mirror: CensusMirror) -> None:
-    """
-    Verifies if the mirror is supported by this version of the census API.
+    """Verifies if the mirror is supported by this version of the census API.
     This method provides a proper error message in case an old version of the census
     tries to connect to an unsupported mirror.
     """
@@ -65,12 +64,10 @@ def _resolve_census_locator(locator: CensusLocator, mirror: CensusMirror) -> Res
 
 
 def _open_soma(
-    locator: ResolvedCensusLocator, context: Optional[soma.options.SOMATileDBContext] = None
+    locator: ResolvedCensusLocator,
+    context: Optional[soma.options.SOMATileDBContext] = None,
 ) -> soma.Collection:
-    """
-    Private. Merge config defaults and return open census as a soma Collection/context.
-    """
-
+    """Private. Merge config defaults and return open census as a soma Collection/context."""
     # if no user-defined context, cellxgene_census defaults take precedence over SOMA defaults
     context = context or get_default_soma_context()
 
@@ -98,7 +95,6 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->
         A :class:``tiledbsoma.SOMATileDBContext` object with sensible defaults.
 
     Examples:
-
         To reduce the amount of memory used by TileDB-SOMA I/O operations:
 
         .. highlight:: python
@@ -122,7 +118,6 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->
     Lifecycle:
         experimental
     """
-
     tiledb_config = dict(DEFAULT_TILEDB_CONFIGURATION, **(tiledb_config or {}))
     return soma.options.SOMATileDBContext().replace(tiledb_config=tiledb_config)
 
@@ -209,7 +204,6 @@ def open_soma(
         >>> with cellxgene_census.open_soma(tiledb_config={"py.init_buffer_bytes": 128 * 1024**2}) as census:
                 ...
     """
-
     if tiledb_config is not None and context is not None:
         raise ValueError("Only one of tiledb_config and context can be specified.")
 

diff --git a/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py b/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py
@@ -2,7 +2,7 @@
 #
 # Licensed under the MIT License.
 
-"""Presence matrix methods
+"""Presence matrix methods.
 
 Methods to retrieve the feature dataset presence matrix.
 """
@@ -44,7 +44,6 @@ def get_presence_matrix(
         <321x60554 sparse array of type '<class 'numpy.uint8'>'
         with 6441269 stored elements in Compressed Sparse Row format>
     """
-
     exp = _get_experiment(census, organism)
     presence = exp.ms[measurement_name]["feature_dataset_presence_matrix"]
     return presence.read((slice(None),)).coos().concat().to_scipy().tocsr()