Merge pull request #85 from PixelgenTechnologies/feature/exe-1364-rem…

…ove-pydoc-styles Validate pydocstyle rules with Ruff
PixelgenTechnologies · Feb 7, 2024 · 907904d · 907904d
2 parents 21eca65 + 12ada02
commit 907904d
Show file tree

Hide file tree

Showing 37 changed files with 325 additions and 442 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 
-extend-ignore = E501,E402,W503,E203,D213,D203,DOC301,DOC502
+extend-ignore = E501,E402,W503,E203,DOC301,DOC502,D213, D203
 exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,cue.mod
 docstring-convention = all
 style = sphinx
@@ -9,7 +9,4 @@ arg-type-hints-in-signature = True
 arg-type-hints-in-docstring = False
 # Ignore documentation linting in tests
 per-file-ignores =
-    tests/**: D101, D102, D103, D200, D202, D205, D212, D121, D400, D401, D403, D404, D415, DOC
-    # Since click uses a different layout for the docs strings to generate the
-    # cli docs, we ignore these rules here.
-    src/pixelator/cli/**: D200, D212, D400, D415
+    tests/**: DOC
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,36 +5,36 @@ repos:
     rev: v4.4.0
     hooks:
       - id: check-yaml
-        exclude: '^(conda-recipe)/(.*)$'
+        exclude: "^(conda-recipe)/(.*)$"
       - id: end-of-file-fixer
-        exclude: '^(.*)/webreport/template.html$'
+        exclude: "^(.*)/webreport/template.html$"
       - id: trailing-whitespace
 
   - repo: https://github.com/psf/black
     rev: 23.3.0
     hooks:
       - id: black
 
+
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    # Ruff version.
+    rev: "v0.2.1"
+    hooks:
+      - id: ruff
+
   - repo: https://github.com/pycqa/flake8
-    rev: '6.0.0'  # pick a git hash / tag to point to
+    rev: "6.0.0" # pick a git hash / tag to point to
     hooks:
       - id: flake8
         additional_dependencies:
-          - pydocstyle
           - flake8-docstrings
           - pydoclint
 
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    # Ruff version.
-    rev: 'v0.0.261'
-    hooks:
-      - id: ruff
-
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.3.0'  # Use the sha / tag you want to point at
+    rev: "v1.3.0" # Use the sha / tag you want to point at
     hooks:
       - id: mypy
-        exclude: '(^cue.mod/)|(^docs/)'
+        exclude: "(^cue.mod/)|(^docs/)"
         additional_dependencies:
           - numpy
           - types-requests
@@ -48,11 +48,11 @@ repos:
 
   - repo: local
     hooks:
-      -  id: copyright-checker
-         name: Copyright notice checker
-         description: Checks that there is a copyright header in all files
-         entry: ./utils/check_copyright.py
-         language: python
-         pass_filenames: true
-         types: [file, python]
-         exclude: '^(docs)/(.*)$'
+      - id: copyright-checker
+        name: Copyright notice checker
+        description: Checks that there is a copyright header in all files
+        entry: ./utils/check_copyright.py
+        language: python
+        pass_filenames: true
+        types: [file, python]
+        exclude: "^(docs)/(.*)$"
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,12 +57,11 @@ fastparquet = "^2023.8.0"
 graspologic = "^3.3.0"
 
 [tool.poetry.group.dev.dependencies]
-ruff = "^0.0.244"
+ruff = "*"
 pre-commit = "^3.2.2"
 bumpversion = "*"
 coverage = "*"
 flake8 = "*"
-flake8-docstrings = "^1.7.0"
 invoke = "*"
 isort = "*"
 pylint = "*"
@@ -103,8 +102,26 @@ filterwarnings = ["ignore::DeprecationWarning",]
 
 [tool.ruff]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # pydocstyle
+    "D",
+]
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+ignore = ["E501","E402","E203","D213","D203"]
+exclude = ["docs/conf.py"]
+
+[tool.ruff.lint.per-file-ignores]
 "pixelator/report/__init__.py" = ["E501"]
+"**/tests/**" = ["D101", "D102", "D103", "D200", "D202", "D205", "D212" , "D400", "D401", "D403", "D404", "D415"]
+# Since click uses a different layout for the docs strings to generate the
+# cli docs, we ignore these rules here.
+"src/pixelator/cli/**" = ["D200", "D212", "D400", "D415"]
+"src/pixelator/test_utils/**" = ["D"]
 
 [tool.poetry-dynamic-versioning]
 enable = true

diff --git a/src/pixelator/__main__.py b/src/pixelator/__main__.py
@@ -1,8 +1,8 @@
-"""
-Top-level entrypoint for Pixelator
+"""Top-level entrypoint for Pixelator.
 
 Copyright (c) 2022 Pixelgen Technologies AB.
 """
+
 import sys
 from pixelator.cli import main_cli
 

diff --git a/src/pixelator/amplicon/__init__.py b/src/pixelator/amplicon/__init__.py
@@ -1,4 +1,5 @@
-"""
+"""Top-level module for amplicon.
+
 Copyright (c) 2023 Pixelgen Technologies AB.
 """
 

diff --git a/src/pixelator/analysis/colocalization/estimate.py b/src/pixelator/analysis/colocalization/estimate.py
@@ -1,4 +1,5 @@
-"""
+"""Module for estimating colocalization statistics.
+
 Copyright (c) 2023 Pixelgen Technologies AB.
 """
 
@@ -22,6 +23,15 @@ def estimate_observation_statistics(
     permutation_results: pd.DataFrame,
     funcs: Tuple[CoLocalizationFunction, ...],
 ):
+    """Estimates the observation statistics for colocalization analysis.
+
+    :param observations: The observed data.
+    :param permutation_results: The permutation results.
+    :param funcs: Tuple of colocalization functions.
+    :return: The estimated observation statistics.
+    :rtype: pd.DataFrame
+    """
+
     def estimates():
         for func in funcs:
             func_name = func.name
@@ -61,6 +71,18 @@ def permutation_analysis_results(
     n=50,
     random_seed: Optional[int] = None,
 ) -> pd.DataFrame:
+    """Perform permutation analysis on colocalization data.
+
+    :param data: The input data for colocalization analysis.
+    :param funcs: A tuple of colocalization functions to apply.
+    :param permuter: A function that generates permuted data for analysis.
+    :param transformer: A function to transform the permuted data before analysis. (optional)
+    :param n: The number of permutations to perform. Default is 50.
+    :param random_seed: The random seed for reproducibility. Default is None.
+    :return: The results of the permutation analysis.
+    :rtype: pd.DataFrame
+    """
+
     def constuct_permutation_data(data, n):
         for idx, permuted_df in enumerate(permuter(data, n=n, random_seed=random_seed)):
             df_for_comp = transformer(permuted_df) if transformer else permuted_df

diff --git a/src/pixelator/analysis/colocalization/permute.py b/src/pixelator/analysis/colocalization/permute.py
@@ -1,4 +1,5 @@
-"""
+"""Module with functions for created permuted data.
+
 Copyright (c) 2023 Pixelgen Technologies AB.
 """
 
@@ -25,8 +26,7 @@ def _get_random_number_generator(
 def permutations(
     df: RegionByCountsDataFrame, n=50, random_seed: Optional[int] = None
 ) -> Generator[RegionByCountsDataFrame, None, None]:
-    """
-    Generate `n` permutatinos of the data provided in `df`
+    """Generate `n` permutatinos of the data provided in `df`.
 
     :param df: dataframe to use as basis of permutations
     :param n: number of permutations to generate, defaults to 50
@@ -43,7 +43,8 @@ def permute(
     df: RegionByCountsDataFrame,
     random_number_generator: Optional[RandomNumberGenerator] = None,
 ) -> RegionByCountsDataFrame:
-    """
+    """Generate permutations of data frame.
+
     Permute the given dataframe in a way that preserves the number of
     counts in each region. The proportions of each marker is kept
     approximately by sampling from a multinomial distribution of the

diff --git a/src/pixelator/annotate/aggregates.py b/src/pixelator/annotate/aggregates.py
@@ -1,5 +1,4 @@
-"""
-This module contains functions for finding aggregates
+"""Functions for finding aggregates.
 
 Copyright (c) 2022 Pixelgen Technologies AB.
 """
@@ -21,8 +20,7 @@
 
 
 def specificity_tau(matrix: np.ndarray) -> np.ndarray:
-    """
-    Tau specificity score computed as described in [1]_.
+    """Tau specificity score computed as described in [1]_.
 
     Essentially it gives us a score between 0 and 1, where a component that
     expresses a single marker would have a tau score of 1, and one where all
@@ -48,7 +46,8 @@ def specificity_tau(matrix: np.ndarray) -> np.ndarray:
 
 
 def call_aggregates(adata: AnnData, inplace: bool = True) -> Optional[AnnData]:
-    """
+    """Call aggregates on the adata instance.
+
     We defined aggregates as components where either:
      - A single or a handful of markers account for almost all of the count data.
        These can likely be attributed to single antibodies forming aggregates

diff --git a/src/pixelator/annotate/cell_calling.py b/src/pixelator/annotate/cell_calling.py
@@ -1,5 +1,4 @@
-"""
-This module contains functions for doing size-based cell calling
+"""Functions for doing size-based cell calling.
 
 Copyright (c) 2022 Pixelgen Technologies AB.
 """
@@ -26,7 +25,8 @@ def find_component_size_limits(
     component_sizes: np.ndarray,
     direction: Literal["lower", "upper"],
 ) -> Optional[int]:
-    """
+    """Find component size limits.
+
     This function will attempt to find a cutoff for a distribution of component sizes.
     The direction of the cut-off is determined by the `direction` parameter (lower for
     min size and upper for max size).
@@ -66,7 +66,8 @@ def find_component_size_limits(
     """
 
     def log_size_and_rank(df: pd.DataFrame) -> pd.DataFrame:
-        """
+        """Rank components by size.
+
         Rank component sizes by size, sort by their rank, and compute the
         log10 of both the sizes and ranks. The input dataframe must contain
         a `size` column with the component sizes. A new dataframe is
@@ -79,7 +80,8 @@ def log_size_and_rank(df: pd.DataFrame) -> pd.DataFrame:
         return df
 
     def smooth(df: pd.DataFrame, x_var: str, y_var: str) -> pd.DataFrame:
-        """
+        """Create a smoothing spline.
+
         Calculate a smoothing spline of df[x_var] ~ df[y_var]
         to make it possible to calculate a less unstable derivate.
         The input dataframe must contain the `x_var` and `y_var`
@@ -96,7 +98,8 @@ def smooth(df: pd.DataFrame, x_var: str, y_var: str) -> pd.DataFrame:
         return df
 
     def derivatives(df: pd.DataFrame, x_var: str) -> pd.DataFrame:
-        """
+        """Calculate first and second derivatives.
+
         Calculate the first and second derivatives of the smoothed
         `x_var` variable. The input dataframe must contain the
         `x_var` and `smooth` columns. The returned dataframe
@@ -108,7 +111,8 @@ def derivatives(df: pd.DataFrame, x_var: str) -> pd.DataFrame:
         return df
 
     def find_der1_vs_der2_outliers(df: pd.DataFrame) -> pd.Series:
-        """
+        """Find the distance from origo to each component.
+
         Find the distance from origo to each component in
         the space df[der1] ~ df[der2], then try to find
         outliers in the upper part of component ranks, by
@@ -128,11 +132,10 @@ def find_der1_vs_der2_outliers(df: pd.DataFrame) -> pd.Series:
         return df["rank"] == rank - 1
 
     def minimum_der2(df: pd.DataFrame) -> pd.Series:
-        """
-        Find argmin element. The function returns
-        a boolean Series where the global minimum
-        of `der2` evaluates to True. The input dataframe
-        must contain the `der2` column.
+        """Find argmin element.
+
+        The function returns a boolean Series where the global minimum of `der2` evaluates
+        to True. The input dataframe must contain the `der2` column.
         """
         return df["der2"] == np.nanmin(df["der2"])
 

diff --git a/src/pixelator/annotate/constants.py b/src/pixelator/annotate/constants.py
@@ -1,6 +1,4 @@
-"""
-This module contains constants used as part of the annotation
-modules.
+"""Constants used as part of the annotation modules.
 
 Copyright (c) 2022 Pixelgen Technologies AB.
 """

diff --git a/src/pixelator/cli/plugin.py b/src/pixelator/cli/plugin.py
@@ -3,6 +3,7 @@
 
 Copyright (c) 2022 Pixelgen Technologies AB.
 """
+
 import importlib.metadata
 
 try:
@@ -45,10 +46,9 @@ def fetch_cli_plugins() -> Generator[EntryPoint, None, None]:
 
 
 def add_cli_plugins(group: Group) -> None:
-    """
-    Add all cli plugins we can find to the provided group
-    :param group: An instance of `click.Group` to add sub commands to
+    """Add all cli plugins we can find to the provided group.
 
+    :param group: An instance of `click.Group` to add sub commands to
     :returns: None
     """
     for entrypoint in fetch_cli_plugins():

diff --git a/src/pixelator/collapse/process.py b/src/pixelator/collapse/process.py
@@ -5,6 +5,7 @@
 
 Copyright (c) 2023 Pixelgen Technologies AB.
 """
+
 import logging
 import tempfile
 import typing
@@ -29,8 +30,10 @@
     from umi_tools._dedup_umi import edit_distance
     from umi_tools.network import breadth_first_search
 
+from pathlib import Path
+from typing import Union
+
 from pixelator.collapse.constants import SEED
-from pixelator.exception import FileFqGzEmpty
 from pixelator.types import PathType
 from pixelator.utils import gz_size
 
@@ -43,6 +46,24 @@
 UniqueFragmentToUpiB = dict[UniqueFragment, list[UpiB]]
 
 
+class FileFqGzEmpty(Exception):
+    """Class to manage empty fastq.gz file exceptions.
+
+    Attributes
+    ----------
+        msg: the error message to output
+        fname: the name of the file
+        size: the size of the file uncompressed (should be 0)
+
+    """
+
+    def __init__(self, msg: str, fname: Union[str, Path], size: int):
+        """Initialize the exception."""
+        self.msg = msg
+        self.fname = fname
+        self.size = size
+
+
 class CollapsedFragment(typing.NamedTuple):
     """A collapsed fragment.
 

diff --git a/src/pixelator/config/__init__.py b/src/pixelator/config/__init__.py
@@ -1,6 +1,4 @@
-"""
-Copyright (c) 2023 Pixelgen Technologies AB.
-"""
+"""Copyright (c) 2023 Pixelgen Technologies AB."""
 
 from pixelator.config.assay import (
     Assay,