diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 7ba94a4..b0ecf9e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,7 +4,7 @@ on: push: branches: [main] pull_request: - branches: ['*'] + branches: ["*"] defaults: run: @@ -20,7 +20,6 @@ jobs: - name: Setup Conda Environment uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge miniforge-version: latest use-mamba: true environment-file: ci/requirements-docs.yml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index a869f2c..ed6a4a4 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -4,7 +4,7 @@ on: push: branches: [main] pull_request: - branches: ['*'] + branches: ["*"] jobs: checks: diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f78ffd2..65320bb 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -4,7 +4,7 @@ on: push: branches: [main] pull_request: - branches: ['*'] + branches: ["*"] workflow_dispatch: # allows you to trigger manually # When this workflow is queued, automatically cancel any previous running @@ -19,61 +19,62 @@ defaults: jobs: build: - name: ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.requirements }} + name: + ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.requirements }} runs-on: ${{ matrix.os }}-latest strategy: fail-fast: false matrix: os: [ubuntu] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] requirements: [latest] include: # Test on macos and windows (first and last version of python only) - os: macos - python-version: '3.8' + python-version: "3.8" requirements: latest - os: macos - python-version: '3.12' + python-version: "3.13" requirements: latest - os: windows - python-version: '3.8' + python-version: "3.8" requirements: latest - os: windows - python-version: '3.12' + python-version: "3.13" requirements: latest # Test on minimal requirements - os: ubuntu - python-version: '3.8' + python-version: "3.8" requirements: minimal # Old builds are not available on osx-arm64 # - os: macos # python-version: '3.8' # requirements: minimal - os: windows - python-version: '3.8' + python-version: "3.8" requirements: minimal # Test without any optional dependencies - os: ubuntu - python-version: '3.8' + python-version: "3.8" requirements: no_optionals - os: ubuntu - python-version: '3.12' + python-version: "3.13" requirements: no_optionals - os: macos - python-version: '3.8' + python-version: "3.8" requirements: no_optionals - os: macos - python-version: '3.12' + python-version: "3.13" requirements: no_optionals - os: windows - python-version: '3.8' + python-version: "3.8" requirements: no_optionals - os: windows - python-version: '3.12' + python-version: "3.13" requirements: no_optionals # Test on nightly builds of requirements - os: ubuntu - python-version: '3.12' + python-version: "3.13" requirements: upstream steps: @@ -85,7 +86,6 @@ jobs: - name: Setup Conda Environment uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge miniforge-version: latest use-mamba: true python-version: ${{ matrix.python-version }} @@ -95,9 +95,6 @@ jobs: - name: Install nightly builds if: ${{ matrix.requirements == 'upstream' }} run: | - # Pick up https://github.com/mamba-org/mamba/pull/2903 - mamba install -n base 'mamba>=1.5.2' - mamba uninstall --force numpy pandas scipy pyarrow python -m pip install --no-deps --pre --prefer-binary \ --extra-index-url https://pypi.fury.io/arrow-nightlies/ \ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 234935b..6844d10 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,23 +1,66 @@ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: mixed-line-ending + - id: name-tests-test + args: ["--pytest-test-first"] + - id: requirements-txt-fixer + - id: trailing-whitespace + + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.4.2 + hooks: + - id: prettier + types_or: [yaml, markdown, html, css, scss, javascript, json] + args: [--prose-wrap=always] + - repo: https://github.com/MarcoGorelli/absolufy-imports rev: v0.3.1 hooks: - id: absolufy-imports name: absolufy-imports - - repo: https://github.com/psf/black - rev: 23.12.1 - hooks: - - id: black - language_version: python3 - args: - - --target-version=py38 + - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.8.4 hooks: - id: ruff args: ["--fix", "--show-fixes"] + - id: ruff-format + + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli + + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.10.0.1" + hooks: + - id: shellcheck + + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.23 + hooks: + - id: validate-pyproject + additional_dependencies: ["validate-pyproject-schema-store[all]"] + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: "0.30.0" + hooks: + - id: check-dependabot + - id: check-github-workflows + - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + rev: v1.14.0 hooks: - id: mypy additional_dependencies: diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4c4a7f1..cef19ce 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,9 +9,9 @@ conda: environment: ci/requirements-docs.yml python: - install: - - method: pip - path: . + install: + - method: pip + path: . sphinx: builder: html diff --git a/README.md b/README.md index 0b0f662..7fba87e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -recursive_diff -============= +# recursive_diff + [![doc-badge](https://github.com/crusaderky/recursive_diff/actions/workflows/docs.yml/badge.svg)](https://github.com/crusaderky/recursive_diff/actions) [![pre-commit-badge](https://github.com/crusaderky/recursive_diff/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/crusaderky/recursive_diff/actions) [![pytest-badge](https://github.com/crusaderky/recursive_diff/actions/workflows/pytest.yml/badge.svg)](https://github.com/crusaderky/recursive_diff/actions) diff --git a/ci/requirements-docs.yml b/ci/requirements-docs.yml index e63fdc7..edf0cbb 100644 --- a/ci/requirements-docs.yml +++ b/ci/requirements-docs.yml @@ -2,7 +2,7 @@ name: recursive_diff-docs channels: - conda-forge dependencies: - - python=3.12 + - python=3.13 - sphinx - sphinx_rtd_theme diff --git a/ci/requirements-latest.yml b/ci/requirements-latest.yml index d815f4f..68167ab 100644 --- a/ci/requirements-latest.yml +++ b/ci/requirements-latest.yml @@ -8,5 +8,5 @@ dependencies: - xarray - dask - - h5netcdf # NetCDF engine - - scipy # NetCDF engine + - h5netcdf # NetCDF engine + - scipy # NetCDF engine diff --git a/ci/requirements-no_optionals.yml b/ci/requirements-no_optionals.yml index c003c0c..fb9f718 100644 --- a/ci/requirements-no_optionals.yml +++ b/ci/requirements-no_optionals.yml @@ -7,4 +7,4 @@ dependencies: - packaging - xarray - - pyarrow # Only needed to suppress warnings; remove when pandas 3.0 is released + - pyarrow # Only needed to suppress warnings; remove when pandas 3.0 is released diff --git a/ci/requirements-upstream.yml b/ci/requirements-upstream.yml index ac63671..fa08f26 100644 --- a/ci/requirements-upstream.yml +++ b/ci/requirements-upstream.yml @@ -8,8 +8,8 @@ dependencies: - xarray - dask - - h5netcdf # NetCDF engine - - scipy # NetCDF engine + - h5netcdf # NetCDF engine + - scipy # NetCDF engine - pip - pip: diff --git a/doc/_static/style.css b/doc/_static/style.css index 7257d57..f4d80fe 100644 --- a/doc/_static/style.css +++ b/doc/_static/style.css @@ -1,15 +1,15 @@ @import url("theme.css"); -.wy-side-nav-search>a img.logo, -.wy-side-nav-search .wy-dropdown>a img.logo { - width: 12rem +.wy-side-nav-search > a img.logo, +.wy-side-nav-search .wy-dropdown > a img.logo { + width: 12rem; } .wy-side-nav-search { background-color: #eee; } -.wy-side-nav-search>div.version { +.wy-side-nav-search > div.version { display: none; } diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html index 4c57ba8..31410c9 100644 --- a/doc/_templates/layout.html +++ b/doc/_templates/layout.html @@ -1,2 +1,2 @@ -{% extends "!layout.html" %} -{% set css_files = css_files + ["_static/style.css"] %} +{% extends "!layout.html" %} {% set css_files = css_files + +["_static/style.css"] %} diff --git a/doc/conf.py b/doc/conf.py index e789a83..c458fd5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -59,7 +59,7 @@ # General information about the project. project = "recursive_diff" -copyright = "2018-%s, recursive_diff Developers" % datetime.datetime.now().year +copyright = f"2018-{datetime.datetime.now().year}, recursive_diff Developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/extend.rst b/doc/extend.rst index f738b68..f0d57ea 100644 --- a/doc/extend.rst +++ b/doc/extend.rst @@ -43,4 +43,4 @@ collections, will be recursively descended into:: >>> list(recursive_diff( ... Rectangle(1, 2), Rectangle(1.1, 2.7), abs_tol=.5)) - ['[h]: 2.0 != 2.7 (abs: 7.0e-01, rel: 3.5e-01)'] \ No newline at end of file + ['[h]: 2.0 != 2.7 (abs: 7.0e-01, rel: 3.5e-01)'] diff --git a/doc/index.rst b/doc/index.rst index a883ac8..914ea52 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,7 +5,7 @@ problem arises when you want to compare two large JSON data structures, because operator will tell you if the two structures differ *somewhere*, but won't tell you where*. Additionally, if the structures contain floating-point numbers, == won't allow to set a tolerance: 1.00000000000001 is different from 1.0, which is majorly problematic -as floating point arithmetics are naturally characterised by noise around the 15th +as floating point arithmetic is naturally characterised by noise around the 15th decimal position (the size of the double-precision mantissa). Tests on floating point numbers are typically performed with :func:`math.isclose` or :func:`numpy.isclose`, which however are not usable if the numbers to be tested lie deep inside a nested diff --git a/pyproject.toml b/pyproject.toml index 1e4fd3e..a20ad8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,11 +11,13 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] requires-python = ">=3.8" dependencies = [ @@ -27,6 +29,8 @@ dynamic = ["version"] [project.urls] Homepage = "https://github.com/crusaderky/recursive_diff" +"Bug Tracker" = "https://github.com/crusaderky/recursive_diff/issues" +Changelog = "https://recursive-diff.readthedocs.io/en/latest/whats-new.html" [project.readme] text = "Recursively compare two Python data structures" @@ -75,6 +79,8 @@ filterwarnings = [ 'ignore:the `pandas.MultiIndex` object.* will no longer be implicitly promoted:FutureWarning', 'ignore:updating coordinate .* with a PandasMultiIndex:FutureWarning', 'ignore:Updating MultiIndexed coordinate .* would corrupt indices:FutureWarning', + # xarray vs. pandas upstream + 'ignore:Converting non-nanosecond precision datetime:UserWarning', ] [tool.coverage.report] @@ -89,27 +95,48 @@ exclude_lines = [ ] [tool.ruff] -builtins = ["ellipsis"] exclude = [".eggs"] target-version = "py38" [tool.ruff.lint] ignore = [ - "E402", # module level import not at top of file - "SIM108", # use ternary operator instead of if-else block - "N999", # Invalid module name: 'TEMPLATE' TODO remove this line + "EM101", # Exception must not use a string literal, assign to variable first + "EM102", # Exception must not use an f-string literal, assign to variable first + "N802", # Function name should be lowercase + "N803", # Argument name should be lowercase + "N806", # Variable should be lowercase + "N816", # Variable in global scope should not be mixedCase + "PT006", # Wrong type passed to first argument of `pytest.mark.parametrize`; expected `tuple` + "PLC0414", # Import alias does not rename original package + "PLR0912", # Too many branches + "PLR0913", # Too many arguments in function definition + "PLR2004", # Magic value used in comparison, consider replacing `123` with a constant variable + "PLW2901", # for loop variable overwritten by assignment target + "SIM108", # Use ternary operator instead of if-else block + "N999", # Invalid module name: 'TEMPLATE' TODO remove this line ] select = [ - "F", # Pyflakes + "YTT", # flake8-2020 "B", # flake8-bugbear "C4", # flake8-comprehensions - "ISC", # flake8-implicit-str-concat + "EM", # flake8-errmsg + "EXE", # flake8-executable + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PIE", # flake8-pie + "PT", # flake8-pytest-style + "RET", # flake8-return "SIM", # flake8-simplify - "E", # Pycodestyle - "W", # Pycodestyle + "ARG", # flake8-unused-arguments "I", # isort + "NPY", # NumPy specific rules "N", # pep8-naming - "UP", # Pyupgrade + "E", # Pycodestyle + "W", # Pycodestyle + "PGH", # pygrep-hooks + "F", # Pyflakes + "PL", # pylint + "UP", # pyupgrade "RUF", # unused-noqa "EXE001", # Shebang is present but file is not executable ] @@ -118,9 +145,9 @@ select = [ known-first-party = ["TEMPLATE"] [tool.mypy] -allow_incomplete_defs = false -allow_untyped_decorators = false -allow_untyped_defs = false +disallow_incomplete_defs = true +disallow_untyped_decorators = true +disallow_untyped_defs = true ignore_missing_imports = true no_implicit_optional = true show_error_codes = true @@ -130,4 +157,4 @@ warn_unreachable = true [[tool.mypy.overrides]] module = ["*.tests.*"] -allow_untyped_defs = true +disallow_untyped_defs = false diff --git a/recursive_diff/__init__.py b/recursive_diff/__init__.py index 186a096..83e71da 100644 --- a/recursive_diff/__init__.py +++ b/recursive_diff/__init__.py @@ -15,4 +15,4 @@ obj.__module__ = "recursive_diff" del obj -__all__ = ("__version__", "recursive_diff", "recursive_eq", "cast") +__all__ = ("__version__", "cast", "recursive_diff", "recursive_eq") diff --git a/recursive_diff/cast.py b/recursive_diff/cast.py index e889676..1f7bbb6 100644 --- a/recursive_diff/cast.py +++ b/recursive_diff/cast.py @@ -3,15 +3,15 @@ from collections.abc import Collection, Hashable from functools import singledispatch -import numpy -import pandas +import numpy as np +import pandas as pd import xarray from recursive_diff.proper_unstack import proper_unstack @singledispatch -def cast(obj: object, brief_dims: Collection[Hashable]) -> object: +def cast(obj: object, brief_dims: Collection[Hashable]) -> object: # noqa: ARG001 """Helper function of :func:`recursive_diff`. Cast objects into simpler object types: @@ -46,25 +46,25 @@ def cast(obj: object, brief_dims: Collection[Hashable]) -> object: return obj -@cast.register(numpy.integer) -def cast_npint(obj: numpy.integer, brief_dims: Collection[Hashable]) -> int: +@cast.register(np.integer) +def cast_npint(obj: np.integer, brief_dims: Collection[Hashable]) -> int: # noqa: ARG001 """Single dispatch specialised variant of :func:`cast` for all numpy scalar integers (not to be confused with numpy arrays of integers) """ return int(obj) -@cast.register(numpy.floating) -def cast_npfloat(obj: numpy.floating, brief_dims: Collection[Hashable]) -> float: +@cast.register(np.floating) +def cast_npfloat(obj: np.floating, brief_dims: Collection[Hashable]) -> float: # noqa: ARG001 """Single dispatch specialised variant of :func:`cast` for all numpy scalar floats (not to be confused with numpy arrays of floats) """ return float(obj) -@cast.register(numpy.ndarray) +@cast.register(np.ndarray) def cast_nparray( - obj: numpy.ndarray, brief_dims: Collection[Hashable] + obj: np.ndarray, brief_dims: Collection[Hashable] ) -> dict[str, object]: """Single dispatch specialised variant of :func:`cast` for :class:`numpy.ndarray`. @@ -73,15 +73,13 @@ def cast_nparray( RangeIndex() as the coords. """ data = _strip_dataarray(xarray.DataArray(obj), brief_dims) - out = {f"dim_{i}": pandas.RangeIndex(size) for i, size in enumerate(obj.shape)} + out = {f"dim_{i}": pd.RangeIndex(size) for i, size in enumerate(obj.shape)} out["data"] = data return out -@cast.register(pandas.Series) -def cast_series( - obj: pandas.Series, brief_dims: Collection[Hashable] -) -> dict[str, object]: +@cast.register(pd.Series) +def cast_series(obj: pd.Series, brief_dims: Collection[Hashable]) -> dict[str, object]: """Single dispatch specialised variant of :func:`cast` for :class:`pandas.Series`. @@ -94,9 +92,9 @@ def cast_series( } -@cast.register(pandas.DataFrame) +@cast.register(pd.DataFrame) def cast_dataframe( - obj: pandas.DataFrame, brief_dims: Collection[Hashable] + obj: pd.DataFrame, brief_dims: Collection[Hashable] ) -> dict[str, object]: """Single dispatch specialised variant of :func:`cast` for :class:`pandas.DataFrame`. @@ -171,9 +169,10 @@ def cast_dataset( } -@cast.register(pandas.MultiIndex) +@cast.register(pd.MultiIndex) def cast_multiindex( - obj: pandas.MultiIndex, brief_dims: Collection[Hashable] + obj: pd.MultiIndex, + brief_dims: Collection[Hashable], # noqa: ARG001 ) -> dict[str, object]: """Single dispatch specialised variant of :func:`cast` for :class:`pandas.MultiIndex`. @@ -184,23 +183,24 @@ def cast_multiindex( return {"names": obj.names, "data": set(obj.tolist())} -@cast.register(pandas.RangeIndex) +@cast.register(pd.RangeIndex) def cast_rangeindex( - obj: pandas.RangeIndex, brief_dims: Collection[Hashable] -) -> pandas.RangeIndex: + obj: pd.RangeIndex, + brief_dims: Collection[Hashable], # noqa: ARG001 +) -> pd.RangeIndex: """Single dispatch specialised variant of :func:`cast` for :class:`pandas.RangeIndex`. This function does nothing - RangeIndex objects are dealt with directly by :func:`_recursive_diff`. This function is defined to prevent RangeIndex objects to be processed by the more generic - ``cast(obj: pandas.Index)`` below. + ``cast(obj: pd.Index)`` below. """ return obj -@cast.register(pandas.Index) -def cast_index(obj: pandas.Index, brief_dims: Collection[Hashable]) -> xarray.DataArray: +@cast.register(pd.Index) +def cast_index(obj: pd.Index, brief_dims: Collection[Hashable]) -> xarray.DataArray: """Single dispatch specialised variant of :func:`cast` for :class:`pandas.Index`. @@ -216,7 +216,7 @@ def cast_index(obj: pandas.Index, brief_dims: Collection[Hashable]) -> xarray.Da @cast.register(frozenset) -def cast_frozenset(obj: frozenset, brief_dims: Collection[Hashable]) -> set: +def cast_frozenset(obj: frozenset, brief_dims: Collection[Hashable]) -> set: # noqa: ARG001 """Single dispatch specialised variant of :func:`cast` for :class:`frozenset`. @@ -226,7 +226,7 @@ def cast_frozenset(obj: frozenset, brief_dims: Collection[Hashable]) -> set: @cast.register(tuple) -def cast_tuple(obj: tuple, brief_dims: Collection[Hashable]) -> list: +def cast_tuple(obj: tuple, brief_dims: Collection[Hashable]) -> list: # noqa: ARG001 """Single dispatch specialised variant of :func:`cast` for :class:`tuple`. @@ -265,7 +265,7 @@ def _strip_dataarray( # Ravel the array to make it become 1-dimensional. # To do this, we must first unstack any already stacked dimension. for dim in obj.dims: - if isinstance(obj.get_index(dim), pandas.MultiIndex): + if isinstance(obj.get_index(dim), pd.MultiIndex): res = proper_unstack(res, dim) # Transpose to ignore dimensions order diff --git a/recursive_diff/dask_or_stub.py b/recursive_diff/dask_or_stub.py index 4f8389d..06e489e 100644 --- a/recursive_diff/dask_or_stub.py +++ b/recursive_diff/dask_or_stub.py @@ -1,5 +1,4 @@ -"""Support dask-backed xarray objects, if dask is installed -""" +"""Support dask-backed xarray objects, if dask is installed""" try: from dask import compute diff --git a/recursive_diff/ncdiff.py b/recursive_diff/ncdiff.py old mode 100755 new mode 100644 index d1e77cf..fcc1dc0 --- a/recursive_diff/ncdiff.py +++ b/recursive_diff/ncdiff.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python """Compare either two NetCDF files or all NetCDF files in two directories. See :doc:`bin/ncdiff` """ + from __future__ import annotations import argparse @@ -115,7 +115,7 @@ def open_netcdf(fname: str, engine: str | None = None) -> xarray.Dataset: """ # At the moment of writing, h5netcdf is the only engine # supporting LZF compression - logging.info(f"Opening {fname}") + logging.info("Opening %s", fname) return xarray.open_dataset(fname, engine=engine, chunks={}) @@ -143,7 +143,7 @@ def recursive_open_netcdf( # We don't invoke open_netcdf() directly inside the pushd context # to get a prettier logging message on the file being opened - logging.info(f"Opening {len(fnames)} NetCDF stores from {path}") + logging.info("Opening %d NetCDF stores from %s", len(fnames), path) return { fname: open_netcdf(os.path.join(path, fname), engine=engine) for fname in fnames } diff --git a/recursive_diff/proper_unstack.py b/recursive_diff/proper_unstack.py index dc2478b..741d280 100644 --- a/recursive_diff/proper_unstack.py +++ b/recursive_diff/proper_unstack.py @@ -2,12 +2,13 @@ Copy-pasted from xarray-extras """ + from __future__ import annotations from collections.abc import Hashable from typing import TypeVar -import pandas +import pandas as pd import xarray T = TypeVar("T", xarray.DataArray, xarray.Dataset) @@ -47,7 +48,7 @@ def proper_unstack(array: T, dim: Hashable) -> T: levels.append([levels_i[k] for k in level_map]) codes.append([level_map[k] for k in codes_i]) - mindex = pandas.MultiIndex(levels, codes, names=mindex.names) + mindex = pd.MultiIndex(levels, codes, names=mindex.names) array = array.copy() array.coords[dim] = mindex @@ -56,8 +57,8 @@ def proper_unstack(array: T, dim: Hashable) -> T: # Convert numpy arrays of Python objects to numpy arrays of C floats, ints, # strings, etc. - for dim in mindex.names: - if array.coords[dim].dtype == object: - array.coords[dim] = array.coords[dim].values.tolist() + for name in mindex.names: + if array.coords[name].dtype == object: + array.coords[name] = array.coords[name].values.tolist() return array diff --git a/recursive_diff/recursive_diff.py b/recursive_diff/recursive_diff.py old mode 100755 new mode 100644 index 59a0b53..708509c --- a/recursive_diff/recursive_diff.py +++ b/recursive_diff/recursive_diff.py @@ -3,6 +3,7 @@ See also its most commonly used wrapper: :func:`~recursive_diff.testing.recursive_eq` """ + from __future__ import annotations import math @@ -10,8 +11,8 @@ from collections.abc import Collection, Hashable, Iterator from typing import Any, Literal -import numpy -import pandas +import numpy as np +import pandas as pd import xarray from recursive_diff import dask_or_stub as dask @@ -109,7 +110,7 @@ def recursive_diff( ) -def _recursive_diff( +def _recursive_diff( # noqa: PLR0915 lhs: Any, rhs: Any, *, @@ -125,7 +126,7 @@ def _recursive_diff( :param list path: list of nodes traversed so far, to be prepended to all error messages :param bool suppress_type_diffs: - if True, don't print out messages about differeces in type + if True, don't print out messages about differences in type :param str join: join type of numpy objects: 'inner' or 'outer'. Ignored for plain Python collections (set, dict, etc.) for which @@ -149,7 +150,7 @@ def diff(msg: str, print_path: list[object] = path) -> str: # Identify if the variables are indices that must go through outer join, # *before* casting. This will be propagated downwards into the recursion. - if join == "inner" and are_instances(lhs, rhs, pandas.Index): + if join == "inner" and are_instances(lhs, rhs, pd.Index): join = "outer" if ( @@ -179,15 +180,15 @@ def diff(msg: str, print_path: list[object] = path) -> str: # When comparing an array vs. a plain python list or scalar, log an error # for the different dtype and then proceed to compare the contents if is_array(dtype_lhs) and is_array_like(dtype_rhs): - rhs = cast(numpy.array(rhs), brief_dims=brief_dims) + rhs = cast(np.array(rhs), brief_dims=brief_dims) elif is_array(dtype_rhs) and is_array_like(dtype_lhs): - lhs = cast(numpy.array(lhs), brief_dims=brief_dims) + lhs = cast(np.array(lhs), brief_dims=brief_dims) # Allow mismatched comparison of a RangeIndex vs. a regular index - if isinstance(lhs, pandas.RangeIndex) and not isinstance(rhs, pandas.RangeIndex): - lhs = cast(pandas.Index(lhs.values), brief_dims=brief_dims) - if isinstance(rhs, pandas.RangeIndex) and not isinstance(lhs, pandas.RangeIndex): - rhs = cast(pandas.Index(rhs.values), brief_dims=brief_dims) + if isinstance(lhs, pd.RangeIndex) and not isinstance(rhs, pd.RangeIndex): + lhs = cast(pd.Index(lhs.values), brief_dims=brief_dims) + if isinstance(rhs, pd.RangeIndex) and not isinstance(lhs, pd.RangeIndex): + rhs = cast(pd.Index(rhs.values), brief_dims=brief_dims) if dtype_lhs != dtype_rhs and not suppress_type_diffs: yield diff(f"object type differs: {dtype_lhs} != {dtype_rhs}") @@ -198,13 +199,13 @@ def diff(msg: str, print_path: list[object] = path) -> str: if are_instances(lhs, rhs, list): if len(lhs) > len(rhs): yield diff( - "LHS has %d more elements than RHS: %s" - % (len(lhs) - len(rhs), _str_trunc(lhs[len(rhs) :])) + f"LHS has {len(lhs) - len(rhs)} more elements than RHS: " + + _str_trunc(lhs[len(rhs) :]) ) elif len(lhs) < len(rhs): yield diff( - "RHS has %d more elements than LHS: %s" - % (len(rhs) - len(lhs), _str_trunc(rhs[len(lhs) :])) + f"RHS has {len(rhs) - len(lhs)} more elements than LHS: " + + _str_trunc(rhs[len(lhs) :]) ) for i, (lhs_i, rhs_i) in enumerate(zip(lhs, rhs)): yield from _recursive_diff( @@ -224,9 +225,9 @@ def diff(msg: str, print_path: list[object] = path) -> str: for x in sorted(rhs - lhs, key=repr): yield diff(f"{_str_trunc(x)} is in RHS only") - elif are_instances(lhs, rhs, pandas.RangeIndex): + elif are_instances(lhs, rhs, pd.RangeIndex): # Pretty-print differences in size. This is used not only by - # pandas.Series and pandas.DataFrame, but also by numpy arrays + # pd.Series and pd.DataFrame, but also by numpy arrays # and xarrays without coords if ( lhs.start == rhs.start == 0 @@ -247,7 +248,7 @@ def diff(msg: str, print_path: list[object] = path) -> str: elif are_instances(lhs, rhs, dict): for key in sorted(lhs.keys() - rhs.keys(), key=repr): - if isinstance(lhs[key], pandas.Index): + if isinstance(lhs[key], pd.Index): join = "outer" if join == "outer": # Comparing an index @@ -255,7 +256,7 @@ def diff(msg: str, print_path: list[object] = path) -> str: else: yield diff(f"Pair {key}:{_str_trunc(lhs[key])} is in LHS only") for key in sorted(rhs.keys() - lhs.keys(), key=repr): - if isinstance(rhs[key], pandas.Index): + if isinstance(rhs[key], pd.Index): join = "outer" if join == "outer": # Comparing an index @@ -346,17 +347,17 @@ def diff(msg: str, print_path: list[object] = path) -> str: # u = uint8,uint16, uint32, uint64 # f = float32, float64 # c = complex64, complex128 - diffs = ~numpy.isclose( + diffs = ~np.isclose( lhs.values, rhs.values, rtol=rel_tol, atol=abs_tol, equal_nan=True ) elif lhs.dtype.kind == "M" and rhs.dtype.kind == "M": # Both arrays are datetime64 - # Unlike with numpy.isclose(equal_nan=True), there is no + # Unlike with np.isclose(equal_nan=True), there is no # straightforward way to do a comparison of dates where # NaT == NaT returns True. # All datetime64's, including NaT, can be cast to milliseconds - # since 1970-01-01 (NaT is a special harcoded value). + # since 1970-01-01 (NaT is a special hardcoded value). # We must first normalise the subtype, so that you can # transparently compare e.g. str: # Comparison between two non-scalar, incomparable types # (like strings and numbers) will return True if diffs is True: - diffs = numpy.full(lhs.shape, dtype=bool, fill_value=True) + diffs = np.full(lhs.shape, dtype=bool, fill_value=True) if diffs.ndim > 1 and lhs.dims[-1] == "__stacked__": # N>0 original dimensions, some (but not all) of which are in @@ -414,8 +415,8 @@ def diff(msg: str, print_path: list[object] = path) -> str: # indices, aligned on themselves. All dict values are NaN # by definition, so we can print a terser output by # converting the dicts to sets. - lhs = {k for k, v in lhs.items() if not pandas.isnull(v)} - rhs = {k for k, v in rhs.items() if not pandas.isnull(v)} + lhs = {k for k, v in lhs.items() if not pd.isnull(v)} + rhs = {k for k, v in rhs.items() if not pd.isnull(v)} # Finally dump out all the differences yield from _recursive_diff( @@ -507,16 +508,16 @@ def _dtype_str(obj: object) -> str: # Base types don't have __name__ dtype = str(type(obj)) - if isinstance(obj, numpy.integer): + if isinstance(obj, np.integer): dtype = "int" - elif isinstance(obj, numpy.floating): + elif isinstance(obj, np.floating): dtype = "float" - if isinstance(obj, (numpy.ndarray, pandas.Series, xarray.DataArray)): + if isinstance(obj, (np.ndarray, pd.Series, xarray.DataArray)): np_dtype = obj.dtype - elif isinstance(obj, pandas.DataFrame): + elif isinstance(obj, pd.DataFrame): # TODO: support for DataFrames with different dtypes on different - # columns. See also cast(obj: pandas.DataFrame) + # columns. See also cast(obj: pd.DataFrame) np_dtype = obj.values.dtype else: np_dtype = None diff --git a/recursive_diff/recursive_eq.py b/recursive_diff/recursive_eq.py index 2311707..70e51d8 100644 --- a/recursive_diff/recursive_eq.py +++ b/recursive_diff/recursive_eq.py @@ -1,5 +1,5 @@ -"""Tools for unit testing -""" +"""Tools for unit testing""" + from typing import Any from recursive_diff.recursive_diff import recursive_diff diff --git a/recursive_diff/tests/test_proper_unstack.py b/recursive_diff/tests/test_proper_unstack.py index 6e60e6e..756e8c2 100644 --- a/recursive_diff/tests/test_proper_unstack.py +++ b/recursive_diff/tests/test_proper_unstack.py @@ -1,7 +1,7 @@ -"""Copy-pasted from xarray-extras -""" -import numpy -import pandas +"""Copy-pasted from xarray-extras""" + +import numpy as np +import pandas as pd import pytest import xarray @@ -12,7 +12,7 @@ def test_proper_unstack_order(): # Note: using MultiIndex.from_tuples is NOT the same thing as # round-tripping DataArray.stack().unstack(), as the latter is not # affected by the re-ordering issue - index = pandas.MultiIndex.from_tuples( + index = pd.MultiIndex.from_tuples( [ ["x1", "first"], ["x1", "second"], @@ -25,7 +25,7 @@ def test_proper_unstack_order(): ], names=["x", "count"], ) - xa = xarray.DataArray(numpy.arange(8), dims=["dim_0"], coords={"dim_0": index}) + xa = xarray.DataArray(np.arange(8), dims=["dim_0"], coords={"dim_0": index}) a = proper_unstack(xa, "dim_0") b = xarray.DataArray( @@ -45,7 +45,7 @@ def test_proper_unstack_dtype(): [[0, 1, 2, 3], [4, 5, 6, 7]], dims=["r", "c"], coords={ - "r": pandas.to_datetime(["2000/01/01", "2000/01/02"]), + "r": pd.to_datetime(["2000/01/01", "2000/01/02"]), "c": [1, 2, 3, 4], }, ) @@ -71,20 +71,20 @@ def test_proper_unstack_dataset(): dims=["x", "col"], coords={ "x": ["x0", "x1"], - "col": pandas.MultiIndex.from_tuples( + "col": pd.MultiIndex.from_tuples( [("u0", "v0"), ("u0", "v1"), ("u1", "v0"), ("u1", "v1")], names=["u", "v"], ), }, ) - xa = xarray.Dataset({"foo": a, "bar": ("w", [1, 2]), "baz": numpy.pi}) + xa = xarray.Dataset({"foo": a, "bar": ("w", [1, 2]), "baz": np.pi}) b = proper_unstack(xa, "col") c = xarray.DataArray( [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dims=["x", "u", "v"], coords={"x": ["x0", "x1"], "u": ["u0", "u1"], "v": ["v0", "v1"]}, ) - d = xarray.Dataset({"foo": c, "bar": ("w", [1, 2]), "baz": numpy.pi}) + d = xarray.Dataset({"foo": c, "bar": ("w", [1, 2]), "baz": np.pi}) xarray.testing.assert_equal(b, d) for c in b.coords: assert b.coords[c].dtype.kind == "U" @@ -95,11 +95,11 @@ def test_proper_unstack_other_mi(): [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]], dims=["row", "col"], coords={ - "row": pandas.MultiIndex.from_tuples( + "row": pd.MultiIndex.from_tuples( [("x0", "w0"), ("x0", "w1"), ("x1", "w0"), ("x1", "w1")], names=["x", "w"], ), - "col": pandas.MultiIndex.from_tuples( + "col": pd.MultiIndex.from_tuples( [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")], names=["y", "z"], ), @@ -110,7 +110,7 @@ def test_proper_unstack_other_mi(): [[[1, 5], [1, 5]], [[2, 6], [2, 6]], [[3, 7], [3, 7]], [[4, 8], [4, 8]]], dims=["col", "x", "w"], coords={ - "col": pandas.MultiIndex.from_tuples( + "col": pd.MultiIndex.from_tuples( [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")], names=["y", "z"], ), diff --git a/recursive_diff/tests/test_recursive_diff.py b/recursive_diff/tests/test_recursive_diff.py index bb115e1..7eb1761 100644 --- a/recursive_diff/tests/test_recursive_diff.py +++ b/recursive_diff/tests/test_recursive_diff.py @@ -42,7 +42,7 @@ def __eq__(self, other): @cast.register(Rectangle) @cast.register(Drawing) -def _(obj, brief_dims): +def _(obj, brief_dims): # noqa: ARG001 return {"w": obj.w, "h": obj.h}