diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57654c6..f2ec487 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,47 +1,41 @@ -name: build +name: Build -on: [ push, pull_request ] +on: + push: + branches: + - main + pull_request: jobs: build: - name: Conda Build with Python${{ matrix.python-version }} runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] # "3.12" is not supported by current dependencies defaults: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install packages run: | sudo apt-get -y install pandoc if: matrix.python-version == 3.9 - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@0dea6379afdaffa5d528b3d1dabc45da37f443fc # v2.0.4 with: cache-downloads: true cache-environment: true environment-file: environment.yml create-args: >- - conda python=${{ matrix.python-version }} - - name: Conda and Mamba versions - run: | - conda --version - echo "micromamba: $(micromamba --version)" - name: Install dependencies run: | - python -m pip install --no-user --editable "." - if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi - - name: Lint with flake8 - run: make lint - if: matrix.python-version == 3.9 -# - name: Check formatting with black -# run: black --check --target-version py39 daops tests -# if: matrix.python-version == 3.9 + python -m pip install --no-user --editable ".[dev,docs]" + - name: Check formatting with black and ruff + run: | + make lint - name: Test with pytest run: | python -m pytest -v tests diff --git a/.gitignore b/.gitignore index 6165a68..be07484 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ ENV/ # mypy .mypy_cache/ + +# JetBrains +.idea diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index e69de29..0000000 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 993dddb..b86bba2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,32 +3,48 @@ default_language_version: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - exclude: 'setup.cfg' - id: end-of-file-fixer - exclude: 'setup.cfg' - id: check-yaml - id: debug-statements + - repo: https://github.com/asottile/pyupgrade + rev: v3.19.0 + hooks: + - id: pyupgrade + args: [ '--py39-plus' ] - repo: https://github.com/psf/black - rev: 23.11.0 + rev: 25.1.0 hooks: - id: black - args: ["--target-version", "py37"] - - repo: https://github.com/pycqa/flake8 - rev: 6.1.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.0 hooks: - - id: flake8 - args: ['--config=setup.cfg'] - - repo: https://github.com/asottile/reorder_python_imports - rev: v3.9.0 + - id: ruff + args: [ '--fix', '--show-fixes' ] + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 hooks: - - id: reorder-python-imports - - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 +# - id: python-check-blanket-noqa +# - id: python-check-blanket-type-ignore + - id: python-no-eval + - id: python-no-log-warn + - id: python-use-type-annotations + - id: rst-directive-colons + - id: rst-inline-touching-normal + - id: text-unicode-replacement-char + - repo: https://github.com/keewis/blackdoc + rev: v0.3.9 hooks: - - id: pyupgrade + - id: blackdoc + additional_dependencies: [ 'black==25.1.0' ] + - id: blackdoc-autoupdate-black + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.29.4 + hooks: + - id: check-github-workflows + - id: check-readthedocs - repo: meta hooks: - id: check-hooks-apply @@ -42,5 +58,5 @@ ci: autoupdate_branch: '' autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' autoupdate_schedule: weekly - skip: [] + skip: [ ] submodules: false diff --git a/.readthedocs.yml b/.readthedocs.yml index 942ed0b..eb55548 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,18 +8,21 @@ version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py + fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub -# formats: -# - pdf +formats: [ ] build: - os: ubuntu-22.04 + os: "ubuntu-24.04" tools: - python: "mambaforge-22.9" + python: "mambaforge-23.11" + jobs: + pre_build: + - sphinx-apidoc -o docs --private --module-first src/daops conda: - environment: environment.yml + environment: environment-docs.yml # Optionally set the version of Python and requirements required to build your docs python: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 24fcaad..f8e19b3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -123,6 +123,7 @@ Logging .. code-block:: python from loguru import logger + logger.warning("This a warning message!") The mechanism for enabling log reporting in scripts/notebooks using ``loguru`` is as follows: diff --git a/Dockerfile b/Dockerfile index f5a485c..1d8cc5c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ ##================================================================================= ## ## EXAMPLE USAGE -## +## ## $ docker build -t daops . ## $ mkdir ~/container-outputs ## $ docker run -it \ @@ -48,7 +48,7 @@ RUN apt-get update && \ ARG mconda=Miniconda3-py311_23.10.0-1-Linux-x86_64.sh RUN wget https://repo.anaconda.com/miniconda/$mconda && \ bash ./$mconda -b -p $MINICONDA_PREFIX && \ - apt-get clean autoremove --yes && \ + apt-get clean autoremove --yes && \ rm -fr $MINICONDA_PREFIX/pkgs # wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba --strip-components=1 && \ @@ -115,5 +115,3 @@ RUN rm -fr /var/lib/{apt,dpkg,cache,log} #RUN ROOCS_CONFIG=$config_file $MINICONDA_PREFIX/envs/daops/bin/daops subset --area 0,-10,120,40 \ # --time 2085-01-16/2120-12-16 --levels / --time-components year:2090,2091,2092 \ # --output-dir /tmp --file-namer simple cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas - - \ No newline at end of file diff --git a/HISTORY.rst b/HISTORY.rst index 6c8e00a..93c2673 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,35 +1,55 @@ Version History =============== -v0.12.0 (2025-01-29) +v0.13.0 (unreleased) -------------------- +Breaking Changes +^^^^^^^^^^^^^^^^ +* ``clisops``>=0.14.0,<0.15 is now required. +* Replaced ``bump2version`` with ``bump-my-version``>=0.28.0. +* The source code structure has been reorganized to use a `src` directory layout. +* Testing structure no longer uses `__init__.py` files in test directories, instead using `conftest.py` with fixtures. +* Tests now use ``pooch`` for testing data retrieval and for safer testing setup and teardown. +* ``roocs-utils``-based `roocs.ini` configurations used within the code base are now handled by a function to fetch the cached configuration. This will be adjusted in a future release to use ``clisops``. +* Obsolete GitLab CI/CD configurations have been removed. + New Features ^^^^^^^^^^^^ +* Added official support for Python3.12. +* ``daops`` is now PEP 561 and PEP 621 compatible (``pyproject.toml``). +* Several docstring improvements to all modules, classes, their methods, and functions. +* ``flake8`` has been replaced with ``ruff`` and ``black``-style formatting. +* ``pre-commit`` hooks have been adjusted to perform Python 3.9+ code style checks and to catch minor security issues. + +v0.12.0 (2025-01-29) +-------------------- + +New Features +^^^^^^^^^^^^ * Added support for opening `kerchunk` files. * Updated `Dockerfile` and `app-package.cwl` file for use with ADES. Other Changes ^^^^^^^^^^^^^ +* Only ``clisops``\<0.15.0 supported. -* only ``clisops``<0.15.0 supported. v0.11.0 (2024-04-10) -------------------- Breaking Changes ^^^^^^^^^^^^^^^^ - * ``clisops``>=0.12.1 required. New Features ^^^^^^^^^^^^ - * Add clisops.ops.average_shape to daops.ops.average + v0.10.0 (2023-11-27) -------------------- +-------------------- Breaking Changes ^^^^^^^^^^^^^^^^ @@ -37,7 +57,6 @@ Breaking Changes New Features ^^^^^^^^^^^^ - * Added regridding operator (#111) @@ -70,6 +89,7 @@ Other Changes v0.8.0 (2022-04-13) ------------------- + Breaking Changes ^^^^^^^^^^^^^^^^ * ``clisops``>=0.9.0 required. @@ -82,6 +102,7 @@ New Features v0.7.0 (2021-10-28) ------------------- + Breaking Changes ^^^^^^^^^^^^^^^^ * ``clisops``>=0.7.0 and ``roocs-utils``>=0.5.0 required. @@ -100,6 +121,7 @@ Other Changes v0.6.0 (2021-05-19) ------------------- + Breaking Changes ^^^^^^^^^^^^^^^^ * intake, fsspec<0.9 and aiohttp are new dependencies in order to use the intake catalog search functionality. @@ -148,8 +170,6 @@ Other Changes v0.3.0 (2020-11-19) ------------------- -Updating doc strings and documentation. - Breaking Changes ^^^^^^^^^^^^^^^^ * ``clisops``>=0.4.0 and ``roocs-utils``>=0.1.4 used. @@ -164,6 +184,7 @@ Breaking Changes New Features ^^^^^^^^^^^^ +* Updated docstrings and documentation. * Added notebook with example usage. * Config file now exists at ``daops.etc.roocs.ini``. This can be overwritten by setting the environment variable ``ROOCS_CONFIG`` to the file path of a config file. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 5f3f2fa..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,9 +0,0 @@ -graft daops -include *.md -global-exclude __pycache__ -global-exclude *.py[co] - -recursive-include etc * -include LICENSE -include requirements.txt -include requirements_dev.txt diff --git a/Makefile b/Makefile index 7978d1e..d7f90a3 100644 --- a/Makefile +++ b/Makefile @@ -50,25 +50,26 @@ clean-test: ## remove test and coverage artifacts rm -fr htmlcov/ rm -fr .pytest_cache -lint: ## check style with flake8 - flake8 daops tests +lint: ## check style with ruff + @echo "Running code style checks ..." + @bash -c 'ruff check src' test: ## run tests quickly with the default Python - py.test + pytest test-all: ## run tests on every Python version with tox - tox + python -m tox coverage: ## check code coverage quickly with the default Python - coverage run --source daops -m pytest - coverage report -m - coverage html + python -m coverage run --source daops -m pytest + python -m coverage report -m + python -m coverage html $(BROWSER) htmlcov/index.html docs: ## generate Sphinx HTML documentation, including API docs - rm -f docs/daops.rst + rm -f docs/daops*.rst rm -f docs/modules.rst - sphinx-apidoc -o docs/ daops + sphinx-apidoc -o docs/ src/daops $(MAKE) -C docs clean $(MAKE) -C docs html $(BROWSER) docs/_build/html/index.html @@ -77,12 +78,11 @@ servedocs: docs ## compile the docs watching for changes watchmedo shell-command -p '*.md' -c '$(MAKE) -C docs html' -R -D . release: dist ## package and upload a release - twine upload dist/* + python -m flit publish dist: clean ## builds source and wheel package - python setup.py sdist - python setup.py bdist_wheel + python -m flit build ls -l dist install: clean ## install the package to the active Python's site-packages - python setup.py install + python -m pip install . diff --git a/README.rst b/README.rst index 508b59d..e76eae5 100644 --- a/README.rst +++ b/README.rst @@ -1,24 +1,21 @@ - daops - data-aware operations ============================= - .. image:: https://img.shields.io/pypi/v/daops.svg - :target: https://pypi.python.org/pypi/daops - :alt: Pypi - - + :target: https://pypi.python.org/pypi/daops + :alt: PyPI .. image:: https://github.com/roocs/daops/workflows/build/badge.svg :target: https://github.com/roocs/daops/actions :alt: Build Status - - .. image:: https://readthedocs.org/projects/daops/badge/?version=latest - :target: https://daops.readthedocs.io/en/latest/?badge=latest - :alt: Documentation + :target: https://daops.readthedocs.io/en/latest/?badge=latest + :alt: Documentation +.. image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/python/black + :alt: Python Black The ``daops`` library (pronounced "day-ops") provides a python interface to a set of operations suitable for working with climate simulation outputs. It is @@ -32,7 +29,6 @@ The data will be loaded and *fixed* using the `xarray `_. - * Free software: BSD * Documentation: https://daops.readthedocs.io @@ -41,7 +37,6 @@ Features The package has the following features: - * Ability to run *data-reduction* operations on large climate data sets. * Knowledge of irregularities/anomalies in some climate data sets. * Ability to apply *fixes* to those data sets before operating on them. @@ -52,11 +47,5 @@ Credits This package was created with ``Cookiecutter`` and the ``cedadev/cookiecutter-pypackage`` project template. - * Cookiecutter: https://github.com/audreyr/cookiecutter * cookiecutter-pypackage: https://github.com/cedadev/cookiecutter-pypackage - - -.. image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/python/black - :alt: Python Black diff --git a/app-package.cwl b/app-package.cwl index 39a4bac..0b9b2b8 100644 --- a/app-package.cwl +++ b/app-package.cwl @@ -54,7 +54,7 @@ $graph: collection: collection file_namer: file_namer output_dir: output_dir - out: + out: - results - class: CommandLineTool @@ -62,7 +62,7 @@ $graph: requirements: InlineJavascriptRequirement: {} EnvVarRequirement: - envDef: + envDef: ROOCS_CONFIG: /root/roocs.ini # PATH: /srv/conda/envs/env_crop/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin # ResourceRequirement: diff --git a/binder/postBuild b/binder/postBuild deleted file mode 100644 index dee8b9c..0000000 --- a/binder/postBuild +++ /dev/null @@ -1 +0,0 @@ -pip install . diff --git a/cli.py b/cli.py index e569030..2b487fb 100644 --- a/cli.py +++ b/cli.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Console script for daops.""" __author__ = """Elle Smith""" diff --git a/daops/__init__.py b/daops/__init__.py deleted file mode 100644 index 172b277..0000000 --- a/daops/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Top-level package for daops. -daops - Dataset-Aware Operations""" - -__author__ = """Elle Smith""" -__contact__ = "eleanor.smith@stfc.ac.uk" -__copyright__ = "Copyright 2018 United Kingdom Research and Innovation" -__license__ = "BSD" -__version__ = "0.12.0" - -from loguru import logger - -from roocs_utils.config import get_config - -import daops - -CONFIG = get_config(daops) - -from .utils.common import enable_logging # noqa - -# Disable logging for daops and remove the logger that is instantiated on import -logger.disable("daops") -logger.remove() diff --git a/daops/catalog/__init__.py b/daops/catalog/__init__.py deleted file mode 100644 index a997378..0000000 --- a/daops/catalog/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from roocs_utils.exceptions import InvalidCollection - -from .intake import IntakeCatalog -from daops import CONFIG - - -def get_catalog(project): - if CONFIG[f"project:{project}"].get("use_catalog"): - try: - catalog = IntakeCatalog(project) - return catalog - except Exception: - raise InvalidCollection() - - -__all__ = [ - "get_catalog", - "IntakeCatalog", -] diff --git a/daops/data_utils/__init__.py b/daops/data_utils/__init__.py deleted file mode 100644 index d7af0d9..0000000 --- a/daops/data_utils/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .coord_utils import squeeze_dims - - -# Demonstrator dummy function -def do_nothing(ds): - return ds diff --git a/daops/data_utils/array_utils.py b/daops/data_utils/array_utils.py deleted file mode 100644 index e69de29..0000000 diff --git a/daops/data_utils/attr_utils.py b/daops/data_utils/attr_utils.py deleted file mode 100644 index c7b06fd..0000000 --- a/daops/data_utils/attr_utils.py +++ /dev/null @@ -1,70 +0,0 @@ -from roocs_utils.xarray_utils import xarray_utils as xu - -from .common_utils import handle_derive_str - - -def edit_var_attrs(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray Dataset - Change the attributes of a variable. - """ - var_id = operands.get("var_id") - - attrs = operands.get("attrs") - for k, v in operands.get("attrs").items(): - v = handle_derive_str(v, ds_id, ds) - ds[var_id].attrs[k] = v - - return ds - - -def edit_global_attrs(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray DataArray - Change the gloabl attributes. - """ - attrs = operands.get("attrs") - - for k, v in operands.get("attrs").items(): - v = handle_derive_str(v, ds_id, ds) - ds.attrs[k] = v - - return ds - - -def add_global_attrs_if_needed(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray Dataset - Add a global attribute if it doesn't already exist. - """ - - attrs = operands.get("attrs") - for k, v in operands.get("attrs").items(): - # check if the key already exists before setting it - v = handle_derive_str(v, ds_id, ds) - if not ds.attrs.get(k, None): - ds.attrs[k] = v - - return ds - - -def remove_coord_attr(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray Dataset - - Remove coordinate attribute that is added by xarray, for specified variables. - """ - var_ids = handle_derive_str(operands.get("var_ids"), ds_id, ds) - - for v in var_ids: - ds[v].encoding["coordinates"] = None - - return ds diff --git a/daops/ops/__init__.py b/daops/ops/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/daops/utils/__init__.py b/daops/utils/__init__.py deleted file mode 100644 index 8d8cc6f..0000000 --- a/daops/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .core import is_characterised diff --git a/daops/utils/base_lookup.py b/daops/utils/base_lookup.py deleted file mode 100644 index 7827141..0000000 --- a/daops/utils/base_lookup.py +++ /dev/null @@ -1,43 +0,0 @@ -import hashlib -import os - -from elasticsearch import Elasticsearch -from roocs_utils.exceptions import InvalidProject -from roocs_utils.project_utils import derive_ds_id - -from daops import CONFIG - - -class Lookup(object): - """ - Base class used for looking up datasets in the elasticsearch indexes. - """ - - def __init__(self, dset): - self.dset = dset - self.es = Elasticsearch( - "https://" - + CONFIG["elasticsearch"]["endpoint"] - + ":" - + CONFIG["elasticsearch"]["port"], - ) - - def convert_to_ds_id(self): - """Converts the input dataset to a drs id form to use with the elasticsearch index.""" - try: - return derive_ds_id(self.dset) - except InvalidProject: - raise Exception( - f"The format of {self.dset} is not known and it could not be converted to a ds id." - ) - - def _convert_id(self, _id): - """ - Converts the dataset id to an md5 checksum used to retrieve the fixes for the dataset. Converts to drs id - format first if necessary. - """ - _id = self.convert_to_ds_id() - - m = hashlib.md5() - m.update(_id.encode("utf-8")) - return m.hexdigest() diff --git a/docs/api.rst b/docs/api.rst index 50e0c7e..f5477e1 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -6,6 +6,7 @@ Subset operation ================= .. automodule:: daops.ops.subset + :noindex: :members: :undoc-members: :show-inheritance: @@ -15,6 +16,7 @@ Average operation ================= .. automodule:: daops.ops.average + :noindex: :members: :undoc-members: :show-inheritance: @@ -57,13 +59,13 @@ Utilities Data Utilities ================ -.. automodule:: daops.data_utils.array_utils +.. automodule:: daops.data_utils.attr_utils :noindex: :members: :undoc-members: :show-inheritance: -.. automodule:: daops.data_utils.attr_utils +.. automodule:: daops.data_utils.common_utils :noindex: :members: :undoc-members: diff --git a/docs/conf.py b/docs/conf.py index 9f93f38..e6a105f 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,8 +19,6 @@ import os import sys -import sphinx_rtd_theme - sys.path.insert(0, os.path.abspath("..")) # -- General configuration --------------------------------------------- @@ -33,16 +31,23 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ "sphinx.ext.autodoc", - "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "sphinx.ext.coverage", "sphinx.ext.mathjax", "sphinx.ext.napoleon", - "sphinx.ext.coverage", "sphinx.ext.todo", - "sphinx.ext.autosectionlabel", + "sphinx.ext.viewcode", "nbsphinx", "IPython.sphinxext.ipython_console_highlighting", ] +autosectionlabel_prefix_document = True +autosectionlabel_maxdepth = 2 + +autodoc_mock_imports = [ + "_pytest", +] + napoleon_numpy_docstring = True napoleon_use_rtype = False napoleon_use_param = False @@ -55,17 +60,16 @@ templates_path = ["_templates"] # The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = [".rst", ".ipynb"] +# You can specify multiple suffix as a dictionary: +source_suffix = {".rst": "restructuredtext"} +# note: do not add .ipynb when nbsphinx is enabled, otherwise you get the "missing title" error # The master toctree document. master_doc = "index" # General information about the project. project = "daops" -copyright = "2020, Elle Smith" +copyright = "2020-2025, Elle Smith" author = "Elle Smith" # The version info for the project you're documenting, acts as replacement diff --git a/docs/environment.yml b/docs/environment.yml deleted file mode 100644 index bf9ab1d..0000000 --- a/docs/environment.yml +++ /dev/null @@ -1,21 +0,0 @@ -# conda env create -f environment.yml -name: daops-docs -channels: - - conda-forge - - defaults -dependencies: - - sphinx - - nbsphinx - - ipython - - pandoc - - pip - - numpy>=1.16 - - xarray>=0.15 - - dask - - netcdf4 - - clisops>=0.6.1 - - elasticsearch>=7.9.1 - - roocs-utils>=0.2.1 -# - pip: -# - roocs-utils @ git+https://github.com/roocs/roocs-utils.git@master#egg=roocs-utils -# - clisops @ git+https://github.com/roocs/clisops.git@master#egg=clisops diff --git a/docs/index.rst b/docs/index.rst index d7302f1..dc7186d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,12 @@ Welcome to daops's documentation! authors history +.. toctree:: + :maxdepth: 1 + :caption: All Modules + + modules + Indices and tables ================== * :ref:`genindex` diff --git a/eg-cli-params.yml b/eg-cli-params.yml index 8b187cc..9330d18 100644 --- a/eg-cli-params.yml +++ b/eg-cli-params.yml @@ -3,4 +3,3 @@ time: "2085-01-16/2188-12-16" file_namer: "simple" output_dir: "." collection: "cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas" - diff --git a/environment-docs.yml b/environment-docs.yml new file mode 100644 index 0000000..9ef1015 --- /dev/null +++ b/environment-docs.yml @@ -0,0 +1,21 @@ +# conda env create -f environment-docs.yml +name: daops-docs +channels: + - conda-forge +dependencies: + - python >=3.9,<3.13 + - clisops >=0.14.0,<0.15.0 + - dask + - elasticsearch>=8.0.1 + - ipykernel + - ipython >=8.5.0 + - matplotlib-base >=3.5.0 + - nbconvert + - nbsphinx + - netcdf4 + - numpy >=1.23,<2.0.0 + - pip >=25.0.0 + - roocs-utils >=0.6.4 + - sphinx >=7.1.0 + - sphinx-rtd-theme >=1.0 + - xarray >=2022.12.0,<2023.3.0 diff --git a/environment.yml b/environment.yml index d3514b5..3475b7d 100644 --- a/environment.yml +++ b/environment.yml @@ -1,24 +1,26 @@ -# conda env create -f environment.yml +# conda env create -f environment-docs.yml name: daops channels: - conda-forge dependencies: - pip - - numpy >=1.16 - - xarray >=0.21,<2023.3.0 # https://github.com/pydata/xarray/issues/7794 + - cftime + - clisops >=0.14.0,<0.15.0 - dask + - elasticsearch>=8.0.1 - netcdf4 - - clisops >=0.12.1,<0.15.0 + - numpy >=1.23.0,<2.0.0 - roocs-grids >=0.1.2 + - roocs-utils >=0.6.4 + - xarray >=2022.12.0,<2023.3.0 # https://github.com/pydata/xarray/issues/7794 - xesmf >=0.8.2 - - elasticsearch>=7.9.1 - - roocs-utils>=0.6.4 # logging - loguru >=0.5.3 # catalog - intake >=0.7.0,<2.0 + - pandas <2.0 # to support kerchunk - fsspec - aiohttp - - zarr==2.13.3 + - zarr >=2.13.3 - zstandard diff --git a/notebooks/subset.ipynb b/notebooks/subset.ipynb index d6c1b68..4047e5a 100644 --- a/notebooks/subset.ipynb +++ b/notebooks/subset.ipynb @@ -6,10 +6,11 @@ "metadata": {}, "outputs": [], "source": [ - "from daops.ops.subset import subset\n", - "\n", "# remove previosuly created example file\n", "import os\n", + "\n", + "from daops.ops.subset import subset\n", + "\n", "if os.path.exists(\"./output_001.nc\"):\n", " os.remove(\"./output_001.nc\")" ] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b7aa7c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,216 @@ +[build-system] +requires = ["flit-core >=3.10.1,<4.0"] +build-backend = "flit_core.buildapi" + +[project] +name = "daops" +authors = [ + {name = "Elle Smith", email = "eleanor.smith@stfc.ac.uk"} +] +maintainers = [ + {name = "Trevor James Smith", email = "smith.trevorj@ouranos.ca"} +] +license = {text = "BSD"} +readme = {file = "README.rst", content-type = "text/x-rst"} +keywords = ["daops"] +homepage = "https://github.com/roocs/daops" +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: End Users/Desktop", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Security", + "Topic :: Internet", + "Topic :: Scientific/Engineering", + "Topic :: System :: Distributed Computing", + "Topic :: System :: Systems Administration :: Authentication/Directory", + "Topic :: Software Development :: Libraries :: Python Modules" +] +dynamic = ["description", "version"] +dependencies = [ + "cftime", + "clisops >=0.14.0,<0.15.0", + "dask[complete]", + "elasticsearch >=8.0.1", + "numpy >=1.23.0,<2.0.0", + "roocs-utils >=0.6.4", + "roocs_grids >=0.1.2", + "xarray >=2022.12.0,<2023.3.0", # https://github.com/pydata/xarray/issues/7794 + "xesmf >=0.8.2", + # logging + "loguru >=0.5.3", + # catalog + "intake >=0.7.0,<2.0", + "pandas <2.0", + # to support kerchunk + "aiohttp", + "fsspec", + "zarr >=2.13.3", + "zstandard" +] + +[project.optional-dependencies] +dev = [ + "black >=25.1.0", + "bump-my-version >=0.28.0", + "coverage >=7.6.0", + "jinja2", + "packaging >=24.2", + "pre-commit >=3.5.0", + "pytest-loguru >=0.3.0", + "pytest >=8.0.0", + "pytest-cov >=6.0.0", + "ruff >=0.9.0", + "tox >=4.18.1", + "watchdog >=4.0", +] +docs = [ + "ipykernel", + "ipython >=8.5.0", + "jupyter_client", + "matplotlib >=3.5.0", + "nbconvert", + "nbsphinx", + "netcdf4", + "sphinx >=7.1.0", + "sphinx-rtd-theme >=1.0" +] + +[project.scripts] +daops = "daops.cli:main" + +[tool] + +[tool.black] +target-version = [ + "py39", + "py310", + "py311", + "py312" +] + +[tool.bumpversion] +current_version = "0.11.0" +commit = true +commit_args = "--no-verify" +tag = true +allow_dirty = false + +[[tool.bumpversion.files]] +filename = "src/daops/__init__.py" +search = "__version__ = \"{current_version}\"" +replace = "__version__ = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "docs/conf.py" +search = "version = \"{current_version}\"" +replace = "version = \"{new_version}\"" + +[tool.coverage.run] +relative_files = true +omit = ["tests/*.py"] + +[tool.deptry] +extend_exclude = ["docs"] +ignore_notebooks = true +pep621_dev_dependency_groups = ["dev", "docs"] + +[tool.flit.module] +name = "daops" + +[tool.flit.sdist] +include = [ + "AUTHORS.rst", + "HISTORY.rst", + "LICENSE", + "Makefile", + "README.rst", + "docs/Makefile", + "docs/make.bat", + "docs/*.rst", + "docs/*.py", + "docs/_static/*.png", + "etc/*", + "src/daops", + "tox.ini" +] + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = [ + "--verbose", + "--color=yes", + "--strict-config", + "--strict-markers" +] +log_cli_level = "INFO" +filterwarnings = ["ignore::UserWarning"] +testpaths = [ + "tests" +] +markers = [ + "online: mark tests that require internet access (deselect with '-m \"not requires_internet\"')" +] +xfail_strict = true + +[tool.ruff] +src = ["src/daops"] +exclude = [ + ".eggs", + ".git", + "build", + "docs", + "tests", +] +line-length = 180 +extend-include = [ + "*.ipynb" # Include notebooks +] + +[tool.ruff.format] +line-ending = "auto" + +[tool.ruff.lint] +extend-select = [ + "RUF022" # unsorted-dunder-all +] +ignore = [ + "D100", # Missing docstring in public module + "D103", # Missing docstring in public function + "D105", # Missing docstring in magic method + "D211", # no-blank-line-before-class +] +preview = true +select = [ + "B", # bugbear + "C90", # mccabe-complexity + "D", # docstrings + "E", # pycodestyle errors + "F", # pyflakes + "I", # imports + "N802", # invalid-function-name + "S", # bandit + "UP", # pyupgrade + "W" # pycodestyle warnings +] +[tool.ruff.lint.mccabe] +max-complexity = 20 + +[tool.ruff.lint.pycodestyle] +max-doc-length = 180 + +[tool.ruff.lint.pydocstyle] +convention = "numpy" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 3aa85fd..0000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -numpy>=1.16 -xarray>=0.21 -xesmf>=0.8.2 -dask[complete] -cftime -netcdf4 -elasticsearch>=8.0.1 -clisops>=0.12.1,<0.15.0 -## clisops @ git+https://github.com/roocs/clisops.git@master#egg=clisops -roocs-utils>=0.6.4 -roocs_grids>=0.1.2 -# logging -loguru>=0.5.3 -# catalog -intake>=0.7.0,<2.0 -# to support kerchunk -aiohttp -fsspec -zarr==2.13.3 diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 65282a5..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,19 +0,0 @@ -pip -bumpversion -wheel -watchdog -flake8 -tox>=4.0 -coverage -Sphinx -sphinx-rtd-theme -nbsphinx -nbconvert -ipython -twine -pytest>=6.0 -pytest-loguru -pytest-runner -pre-commit>=3.0.0 -black>=23.7.0 -GitPython>=3.1.37 diff --git a/roocs.ini.tmpl b/roocs.ini.tmpl index 48b347d..ae351ef 100644 --- a/roocs.ini.tmpl +++ b/roocs.ini.tmpl @@ -1,17 +1,17 @@ [project:cmip5] -base_dir = DATA_DIR/test_data/badc/cmip5/data/cmip5 +base_dir = DATA_DIR/badc/cmip5/data/cmip5 [project:cmip6] -base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6 +base_dir = DATA_DIR/badc/cmip6/data/CMIP6 [project:cordex] -base_dir = DATA_DIR/test_data/badc/cordex/data/cordex +base_dir = DATA_DIR/badc/cordex/data/cordex [project:c3s-cmip5] -base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5 +base_dir = DATA_DIR/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5 [project:c3s-cmip6] -base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6 +base_dir = DATA_DIR/badc/cmip6/data/CMIP6 [project:c3s-cordex] -base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex +base_dir = DATA_DIR/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 84bfdd1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,60 +0,0 @@ -[bumpversion] -current_version = 0.12.0 -commit = True -tag = True - -[bumpversion:file:daops/__init__.py] -search = __version__ = "{current_version}" -replace = __version__ = "{new_version}" - -[bumpversion:file:setup.py] -search = __version__ = "{current_version}" -replace = __version__ = "{new_version}" - -[bumpversion:file:docs/conf.py] -search = version = "{current_version}" -replace = version = "{new_version}" - -[bdist_wheel] -universal = 1 - -[flake8] -exclude = - .git, - docs, - build, - .eggs, - tests/mini-esgf-data -max-line-length = 88 -max-complexity = 12 -ignore = - C901 - E203 - E231 - E266 - E402 - E501 - F401 - F403 - W503 - W504 - F841 - F541 - F821 - -[aliases] -test = pytest - -[tool:pytest] -addopts = --verbose tests -filterwarnings = - ignore::UserWarning -markers = - online: mark test to need internet connection - -[pylint] -ignore = docs,tests -disable = - too-many-arguments, - too-few-public-methods, - invalid-name, diff --git a/setup.py b/setup.py deleted file mode 100644 index 727cfe6..0000000 --- a/setup.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python -"""The setup script.""" - -__author__ = """Elle Smith""" -__contact__ = "eleanor.smith@stfc.ac.uk" -__copyright__ = "Copyright 2018 United Kingdom Research and Innovation" -__license__ = "BSD" -__version__ = "0.12.0" - -import os -from setuptools import setup, find_packages - -# One strategy for storing the overall version is to put it in the top-level -# package's __init__ but Nb. __init__.py files are not needed to declare -# packages in Python 3 - -# Populate long description setting with content of README -# -# Use markdown format read me file as GitHub will render it automatically -# on package page -here = os.path.abspath(os.path.dirname(__file__)) -_long_description = open(os.path.join(here, "README.rst")).read() - -requirements = [line.strip() for line in open("requirements.txt")] - -setup_requirements = [ - "pytest-runner", -] - -test_requirements = ["pytest", "tox"] - -docs_requirements = [ - "sphinx", - "sphinx-rtd-theme", - "nbsphinx", - "pandoc", - "ipython", - "ipykernel", - "jupyter_client", - "matplotlib", -] - -setup( - author=__author__, - author_email=__contact__, - # See: - # https://www.python.org/dev/peps/pep-0301/#distutils-trove-classification - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "Environment :: Web Environment", - "Intended Audience :: End Users/Desktop", - "Intended Audience :: Developers", - "Intended Audience :: System Administrators", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Natural Language :: English", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Security", - "Topic :: Internet", - "Topic :: Scientific/Engineering", - "Topic :: System :: Distributed Computing", - "Topic :: System :: Systems Administration :: Authentication/Directory", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - description="daops - data-aware operations", - license=__license__, - # This qualifier can be used to selectively exclude Python versions - - # in this case early Python 2 and 3 releases - python_requires=">=3.7.0", - install_requires=[ - requirements, - ], - entry_points={ - "console_scripts": [ - "daops=daops.cli:main", - ], - }, - long_description=_long_description, - long_description_content_type="text/x-rst", - include_package_data=True, - package_data={"daops": ["etc/roocs.ini"]}, - keywords="daops", - name="daops", - packages=find_packages(), - setup_requires=setup_requirements, - test_suite="tests", - tests_require=test_requirements, - url="https://github.com/roocs/daops", - version=__version__, - extras_require={"docs": docs_requirements}, - # zip_safe=False, -) diff --git a/src/daops/__init__.py b/src/daops/__init__.py new file mode 100644 index 0000000..4b1b39b --- /dev/null +++ b/src/daops/__init__.py @@ -0,0 +1,32 @@ +"""Dataset-Aware Operations.""" + +__author__ = """Elle Smith""" +__contact__ = "eleanor.smith@stfc.ac.uk" +__copyright__ = "Copyright 2018-2025 United Kingdom Research and Innovation" +__version__ = "0.12.0" + +from functools import lru_cache + +from loguru import logger +from roocs_utils.config import get_config as _get_config + +import daops + +# CONFIG = _get_config(daops) + + +@lru_cache(maxsize=1) +def _config_cached(): + return _get_config(daops) + + +def config_(): + cfg = _config_cached() + return cfg + + +from .utils.common import enable_logging # noqa + +# Disable logging for daops and remove the logger that is instantiated on import +logger.disable("daops") +logger.remove() diff --git a/src/daops/catalog/__init__.py b/src/daops/catalog/__init__.py new file mode 100644 index 0000000..162f40c --- /dev/null +++ b/src/daops/catalog/__init__.py @@ -0,0 +1,23 @@ +"""Catalog module for the daops package.""" + +from roocs_utils.exceptions import InvalidCollection + +from daops import config_ + +from .intake import IntakeCatalog + + +def get_catalog(project): + """Get the catalog object for the project.""" + if config_()[f"project:{project}"].get("use_catalog"): + try: + catalog = IntakeCatalog(project) + return catalog + except Exception as err: + raise InvalidCollection() from err + + +__all__ = [ + "IntakeCatalog", + "get_catalog", +] diff --git a/daops/catalog/base.py b/src/daops/catalog/base.py similarity index 67% rename from daops/catalog/base.py rename to src/daops/catalog/base.py index 2c536a8..b40d3f7 100644 --- a/daops/catalog/base.py +++ b/src/daops/catalog/base.py @@ -1,9 +1,12 @@ +"""Base class for catalog.""" + import os -from daops import CONFIG +from daops import config_ def make_list(value): + """Make a list from a value.""" if isinstance(value, list): val = value else: @@ -11,14 +14,15 @@ def make_list(value): return val -class Catalog: - def __init__(self, project): +class Catalog: # noqa: D101 + def __init__(self, project): # noqa: D107 self.project = project def _query(self, collection, time=None, time_components=None): raise NotImplementedError def search(self, collection, time=None, time_components=None): + """Search the catalog for datasets.""" cols = make_list(collection) records = self._query(cols, time, time_components) result = Result(self.project, records) @@ -26,12 +30,15 @@ def search(self, collection, time=None, time_components=None): class Result: + """Class to hold the results of a catalog search.""" + def __init__(self, project, records): - """records are an OrderedDict of dataset ids with a list of files: - {'ds_id': [files]} + """Parse the records. + + Records are an OrderedDict of dataset ids with a list of files: {'ds_id': [files]}. """ - self.base_dir = CONFIG.get(f"project:{project}", {}).get("base_dir") - self.base_url = CONFIG.get(f"project:{project}", {}).get("data_node_root") + self.base_dir = config_().get(f"project:{project}", {}).get("base_dir") + self.base_url = config_().get(f"project:{project}", {}).get("data_node_root") self.records = records @property @@ -39,7 +46,7 @@ def matches(self): """Return number of matched records.""" return len(self.records) - def __len__(self): + def __len__(self): # noqa: D105 return self.matches def _records(self, prefix): diff --git a/daops/catalog/intake.py b/src/daops/catalog/intake.py similarity index 80% rename from daops/catalog/intake.py rename to src/daops/catalog/intake.py index e847797..bb05d71 100644 --- a/daops/catalog/intake.py +++ b/src/daops/catalog/intake.py @@ -1,29 +1,32 @@ +"""Utilities for working with Intake catalogs.""" + import intake -from .base import Catalog -from .util import MAX_DATETIME -from .util import MIN_DATETIME -from .util import parse_time -from daops import CONFIG +from daops import config_ -# from intake.config import conf as intake_config +from .base import Catalog +from .util import MAX_DATETIME, MIN_DATETIME, parse_time class IntakeCatalog(Catalog): + """Intake catalog class.""" + def __init__(self, project, url=None): - super(IntakeCatalog, self).__init__(project) - self.url = url or CONFIG.get("catalog", None).get("intake_catalog_url", None) + super().__init__(project) + self.url = url or config_().get("catalog", None).get("intake_catalog_url", None) self._cat = None self._store = {} # intake_config["cache_dir"] = "/tmp/inventory_cache" @property def catalog(self): + """Return the intake catalog.""" if not self._cat: self._cat = intake.open_catalog(self.url) return self._cat def load(self): + """Load the catalog.""" if self.project not in self._store: self._store[self.project] = self.catalog[self.project].read() return self._store[self.project] diff --git a/daops/catalog/util.py b/src/daops/catalog/util.py similarity index 93% rename from daops/catalog/util.py rename to src/daops/catalog/util.py index 8dd9af9..36bff88 100644 --- a/daops/catalog/util.py +++ b/src/daops/catalog/util.py @@ -1,3 +1,5 @@ +"""Catalog utility functions.""" + import datetime from roocs_utils.parameter.time_components_parameter import TimeComponentsParameter @@ -8,6 +10,7 @@ def parse_time(time=None, time_components=None): + """Parse the time coordinate.""" start = end = None if time: if isinstance(time, TimeParameter): diff --git a/daops/cli.py b/src/daops/cli.py similarity index 91% rename from daops/cli.py rename to src/daops/cli.py index 891db30..230331d 100644 --- a/daops/cli.py +++ b/src/daops/cli.py @@ -5,17 +5,18 @@ __copyright__ = "Copyright 2023 United Kingdom Research and Innovation" __license__ = "BSD - see LICENSE file in top-level package directory" -import os -import sys import argparse -import dateutil.parser import configparser +import os +import sys -from daops.ops.subset import subset from roocs_utils.utils.file_utils import FileMapper +from daops.ops.subset import subset + def parse_args(): + """Parse the command line arguments.""" parser = argparse.ArgumentParser() sub_parsers = parser.add_subparsers() sub_parsers.required = True @@ -76,6 +77,7 @@ def parse_args(): def get_params(args): + """Get the parameters.""" collection = ( args.collection if len(args.collection) == 1 else FileMapper(args.collection) ) @@ -94,10 +96,9 @@ def get_params(args): def check_env(): - """ - Check that ROOCS_CONFIG points to a valid config file - (although for certain types of invalid file, in fact main is never called, - so exit might not always be graceful in these cases). + """Check that ROOCS_CONFIG points to a valid config file. + + Although for certain types of invalid file, in fact main is never called, so exit might not always be graceful in these cases. Call this after get_params() so that 'help' still works even if this is not set. """ config_env_var = "ROOCS_CONFIG" @@ -114,6 +115,7 @@ def check_env(): def main(): + """Console script for daops.""" args = parse_args() params = get_params(args) check_env() diff --git a/src/daops/data_utils/__init__.py b/src/daops/data_utils/__init__.py new file mode 100644 index 0000000..52ec844 --- /dev/null +++ b/src/daops/data_utils/__init__.py @@ -0,0 +1,3 @@ +"""Data utilities for the daops package.""" + +from .coord_utils import squeeze_dims as squeeze_dims diff --git a/src/daops/data_utils/attr_utils.py b/src/daops/data_utils/attr_utils.py new file mode 100644 index 0000000..6d94127 --- /dev/null +++ b/src/daops/data_utils/attr_utils.py @@ -0,0 +1,108 @@ +"""Module for editing the attributes of a dataset.""" + +from .common_utils import handle_derive_str + + +def edit_var_attrs(ds_id, ds, **operands): + """ + Edit the variable attrs. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new attributes for the variable. + + Returns + ------- + xarray.Dataset + """ + var_id = operands.get("var_id") + + operands.get("attrs") + for k, v in operands.get("attrs").items(): + v = handle_derive_str(v, ds_id, ds) + ds[var_id].attrs[k] = v + + return ds + + +def edit_global_attrs(ds_id, ds, **operands): + """ + Edit the global attrs. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new attributes for the dataset. + + Returns + ------- + xarray.Dataset + """ + operands.get("attrs") + + for k, v in operands.get("attrs").items(): + v = handle_derive_str(v, ds_id, ds) + ds.attrs[k] = v + + return ds + + +def add_global_attrs_if_needed(ds_id, ds, **operands): + """ + Add the global attrs, if needed. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new attributes for the dataset. + + Returns + ------- + xarray.Dataset + """ + operands.get("attrs") + for k, v in operands.get("attrs").items(): + # check if the key already exists before setting it + v = handle_derive_str(v, ds_id, ds) + if not ds.attrs.get(k, None): + ds.attrs[k] = v + + return ds + + +def remove_coord_attr(ds_id, ds, **operands): + """ + Remove the coordinate attr from the dataset. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new attributes for the dataset. + + Returns + ------- + xarray.Dataset + """ + var_ids = handle_derive_str(operands.get("var_ids"), ds_id, ds) + + for v in var_ids: + ds[v].encoding["coordinates"] = None + + return ds diff --git a/daops/data_utils/common_utils.py b/src/daops/data_utils/common_utils.py similarity index 81% rename from daops/data_utils/common_utils.py rename to src/daops/data_utils/common_utils.py index b04e733..f88add2 100644 --- a/daops/data_utils/common_utils.py +++ b/src/daops/data_utils/common_utils.py @@ -1,7 +1,10 @@ +"""Common utility functions for data operations.""" + from pydoc import locate def handle_derive_str(value, ds_id, ds): + """Handle the derive string.""" if isinstance(value, str) and "derive" in value: components = value.split(":") func = locate(components[1].strip()) diff --git a/daops/data_utils/coord_utils.py b/src/daops/data_utils/coord_utils.py similarity index 72% rename from daops/data_utils/coord_utils.py rename to src/daops/data_utils/coord_utils.py index 558fa40..db4c26a 100644 --- a/daops/data_utils/coord_utils.py +++ b/src/daops/data_utils/coord_utils.py @@ -1,81 +1,114 @@ -import numpy as np -from roocs_utils.xarray_utils import xarray_utils as xu - -from .common_utils import handle_derive_str - - -def squeeze_dims(ds_id, ds, **operands): - """ - :param ds: Xarray Dataset - :param operands: (dict) Arguments for fix. Dims (list) to remove. - :return: Xarray Dataset - """ - dims = operands.get("dims") - for dim in dims: - ds = ds.squeeze(dim) - - return ds - - -def add_scalar_coord(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray Dataset - Add a scalar coordinate. - """ - var_id = operands.get("var_id") - value = operands.get("value") - dtype = operands.get("dtype") - - value = handle_derive_str(value, ds_id, ds) - ds = ds.assign_coords({f"{var_id}": np.array(value, dtype=dtype)}) - - for k, v in operands.get("attrs").items(): - v = handle_derive_str(v, ds_id, ds) - ds[var_id].attrs[k] = v - - if operands.get("encoding"): - for k, v in operands.get("encoding").items(): - v = handle_derive_str(v, ds_id, ds) - ds[var_id].encoding[k] = v - - # update coordinates of main variable of dataset - main_var = xu.get_main_variable(ds) - main_var_coords = ds[main_var].encoding.get("coordinates", "") - main_var_coords += f" {var_id}" - ds[main_var].encoding["coordinates"] = main_var_coords - - return ds - - -def add_coord(ds_id, ds, **operands): - """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray DataArray - Add a coordinate. - """ - var_id = operands.get("var_id") - dim = operands.get("dim") - value = operands.get("value") - dtype = operands.get("dtype") - - value = handle_derive_str(value, ds_id, ds) - ds = ds.assign_coords({f"{var_id}": (dim, np.array(value, dtype=dtype))}) - - for k, v in operands.get("attrs").items(): - v = handle_derive_str(v, ds_id, ds) - ds[var_id].attrs[k] = v - - for k, v in operands.get("encoding").items(): - v = handle_derive_str(v, ds_id, ds) - ds[var_id].encoding[k] = v - - # update coordinates of main variable of dataset - main_var = xu.get_main_variable(ds) - main_var_coords = ds[main_var].encoding.get("coordinates", "") - main_var_coords += f" {var_id}" - ds[main_var].encoding["coordinates"] = main_var_coords - - return ds +"""Coordinate operations.""" + +import numpy as np +from roocs_utils.xarray_utils import xarray_utils as xu + +from .common_utils import handle_derive_str + + +def squeeze_dims(ds_id, ds, **operands): + """Squeeze dimensions from dataset. + + Parameters + ---------- + ds_id : str + Dataset ID. + Unused in this function. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the dimensions to remove. + + Returns + ------- + xarray.Dataset + """ + dims = operands.get("dims") + for dim in dims: + ds = ds.squeeze(dim) + + return ds + + +def add_scalar_coord(ds_id, ds, **operands): + """ + Add a scalar coordinate. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new coordinate. + + Returns + ------- + xarray.Dataset + """ + var_id = operands.get("var_id") + value = operands.get("value") + dtype = operands.get("dtype") + + value = handle_derive_str(value, ds_id, ds) + ds = ds.assign_coords({f"{var_id}": np.array(value, dtype=dtype)}) + + for k, v in operands.get("attrs").items(): + v = handle_derive_str(v, ds_id, ds) + ds[var_id].attrs[k] = v + + if operands.get("encoding"): + for k, v in operands.get("encoding").items(): + v = handle_derive_str(v, ds_id, ds) + ds[var_id].encoding[k] = v + + # update coordinates of main variable of dataset + main_var = xu.get_main_variable(ds) + main_var_coords = ds[main_var].encoding.get("coordinates", "") + main_var_coords += f" {var_id}" + ds[main_var].encoding["coordinates"] = main_var_coords + + return ds + + +def add_coord(ds_id, ds, **operands): + """ + Add a coordinate. + + Parameters + ---------- + ds_id : str + Dataset ID. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new coordinate. + + Returns + ------- + xarray.Dataset + """ + var_id = operands.get("var_id") + dim = operands.get("dim") + value = operands.get("value") + dtype = operands.get("dtype") + + value = handle_derive_str(value, ds_id, ds) + ds = ds.assign_coords({f"{var_id}": (dim, np.array(value, dtype=dtype))}) + + for k, v in operands.get("attrs").items(): + v = handle_derive_str(v, ds_id, ds) + ds[var_id].attrs[k] = v + + for k, v in operands.get("encoding").items(): + v = handle_derive_str(v, ds_id, ds) + ds[var_id].encoding[k] = v + + # update coordinates of main variable of dataset + main_var = xu.get_main_variable(ds) + main_var_coords = ds[main_var].encoding.get("coordinates", "") + main_var_coords += f" {var_id}" + ds[main_var].encoding["coordinates"] = main_var_coords + + return ds diff --git a/daops/data_utils/var_utils.py b/src/daops/data_utils/var_utils.py similarity index 54% rename from daops/data_utils/var_utils.py rename to src/daops/data_utils/var_utils.py index fb9def1..11eef1d 100644 --- a/daops/data_utils/var_utils.py +++ b/src/daops/data_utils/var_utils.py @@ -1,12 +1,25 @@ +"""Module to add a data variable to a dataset.""" + import numpy as np def add_data_var(ds_id, ds, **operands): """ - :param ds: Xarray DataSet - :param operands: sequence of arguments - :return: Xarray Dataset Add a data variable. + + Parameters + ---------- + ds_id : str + Dataset ID. + Unused in this function. + ds : xarray.Dataset + A Dataset. + operands : dict + Dictionary containing the new data variable. + + Returns + ------- + xarray.Dataset """ var_id = operands.get("var_id") value = operands.get("value") diff --git a/daops/etc/roocs.ini b/src/daops/etc/roocs.ini similarity index 100% rename from daops/etc/roocs.ini rename to src/daops/etc/roocs.ini diff --git a/daops/fix_utils/decadal_utils.py b/src/daops/fix_utils/decadal_utils.py similarity index 86% rename from daops/fix_utils/decadal_utils.py rename to src/daops/fix_utils/decadal_utils.py index c7279b3..15fce9d 100644 --- a/daops/fix_utils/decadal_utils.py +++ b/src/daops/fix_utils/decadal_utils.py @@ -1,8 +1,9 @@ +"""Utility functions for fixing decadal datasets.""" + import re from datetime import datetime import cftime -import numpy as np model_specific_global_attrs = { "CMCC-CM2-SR5": { @@ -13,7 +14,8 @@ "EC-Earth3": { "forcing_description": "f1, CMIP6 historical forcings", "physics_description": "physics from the standard model configuration, with no additional tuning or different parametrization", - "initialization_description": "Atmosphere initialization based on full-fields from ERA-Interim (s1979-s2018) or ERA-40 (s1960-s1978); ocean/sea-ice initialization based on full-fields from NEMO/LIM assimilation run nudged towards ORA-S4 (s1960-s2018)", + "initialization_description": "Atmosphere initialization based on full-fields from ERA-Interim (s1979-s2018) or ERA-40 (s1960-s1978);" + " ocean/sea-ice initialization based on full-fields from NEMO/LIM assimilation run nudged towards ORA-S4 (s1960-s2018)", }, "HadGEM3-GC31-MM": { "forcing_description": "f2, CMIP6 v6.2.0 forcings; no ozone remapping", @@ -29,12 +31,14 @@ def get_time_calendar(ds_id, ds): + """Get the time calendar.""" times = ds.time.values cal = times[0].calendar return cal def get_lead_times(ds_id, ds): + """Get the lead times.""" start_date = datetime.fromisoformat(get_start_date(ds_id, ds)) cal = get_time_calendar(ds_id, ds) @@ -59,18 +63,21 @@ def get_lead_times(ds_id, ds): def get_start_date(ds_id, ds): + """Get the start date.""" year = ds_id.split(".")[5].split("-")[0].lstrip("s") sd = datetime(int(year), 11, 1, 0, 0).isoformat() return sd def get_sub_experiment_id(ds_id, ds): + """Get the sub-experiment id.""" sd = datetime.fromisoformat(get_start_date(ds_id, ds)) se_id = f"s{sd.year}{sd.month}" return se_id def get_reftime(ds_id, ds): + """Get the reference time.""" default_sd = get_start_date(ds_id, ds) start_date = ds.attrs.get("startdate", None) @@ -79,7 +86,7 @@ def get_reftime(ds_id, ds): start_date = default_sd else: - #  attempt to get from startdate attribute - don't know if it will always be in sYYYYMM format? + # Attempt to get from startdate attribute - don't know if it will always be in sYYYYMM format? regex = re.compile(r"^s(\d{4})(\d{2})$") match = regex.match(start_date) @@ -106,12 +113,14 @@ def get_reftime(ds_id, ds): def get_bnd_vars(ds_id, ds): + """Get the bounds variables.""" bnd_vars = ["latitude", "longitude", "time"] bounds_list = [ds.cf.get_bounds(bv).name for bv in bnd_vars] return bounds_list def get_decadal_bnds_list(ds_id, ds): + """Get the bounds variables for decadal datasets.""" bounds_list = get_bnd_vars(ds_id, ds) # coordinate attribute is always added to realization variable in decadal datasets bounds_list.append("realization") @@ -119,6 +128,7 @@ def get_decadal_bnds_list(ds_id, ds): def get_decadal_model_attr_from_dict(ds_id, ds, attr): + """Get the model-specific global attribute.""" # Add the model-specific global attr model = ds_id.split(".")[3] value = model_specific_global_attrs[model][attr] @@ -126,7 +136,8 @@ def get_decadal_model_attr_from_dict(ds_id, ds, attr): def fix_further_info_url(ds_id, ds): - model = ds_id.split(".")[3] + """Fix the further info url.""" + ds_id.split(".")[3] further_info_url = ds.attrs.get("further_info_url", None) if "none" in further_info_url: diff --git a/src/daops/ops/__init__.py b/src/daops/ops/__init__.py new file mode 100644 index 0000000..75b0d8a --- /dev/null +++ b/src/daops/ops/__init__.py @@ -0,0 +1 @@ +"""Operations module for the DAOPS package.""" diff --git a/daops/ops/average.py b/src/daops/ops/average.py similarity index 84% rename from daops/ops/average.py rename to src/daops/ops/average.py index b28c93a..1957bec 100644 --- a/daops/ops/average.py +++ b/src/daops/ops/average.py @@ -1,24 +1,18 @@ +"""Operations for averaging data over dimensions, shape or time.""" + from clisops.ops.average import average_over_dims as clisops_average_over_dims from clisops.ops.average import average_shape as clisops_average_shape from clisops.ops.average import average_time as clisops_average_time -from roocs_utils.parameter import collection_parameter -from roocs_utils.parameter import dimension_parameter +from roocs_utils.parameter import collection_parameter, dimension_parameter from daops.ops.base import Operation -__all__ = [ - "average_over_dims", - "average_time", - "average_shape" -] +__all__ = ["average_over_dims", "average_shape", "average_time"] class Average(Operation): def _resolve_params(self, collection, **params): - """ - Resolve the input parameters to `self.params` and parameterise - collection parameter and set to `self.collection`. - """ + """Resolve the input parameters to `self.params` and parameterise collection parameter and set to `self.collection`.""" dims = dimension_parameter.DimensionParameter(params.get("dims")) collection = collection_parameter.CollectionParameter(collection) @@ -42,8 +36,8 @@ def average_over_dims( file_namer="standard", apply_fixes=True, ): - """ - Average input dataset according over indicated dimensions. + """Average input dataset according to indicated dimensions. + Can be averaged over multiple dimensions. Parameters @@ -62,10 +56,9 @@ def average_over_dims( ------- List of outputs in the selected type: a list of xarray Datasets or file paths. - Examples -------- - | collection: ("cmip6.ukesm1.r1.gn.tasmax.v20200101",) + | collection: ("cmip6.ukesm1.r1.gn.tasmax.v20200101") | dims: ["time", "lat"] | ignore_undetected_dims: (-5.,49.,10.,65) | output_type: "netcdf" @@ -73,19 +66,14 @@ def average_over_dims( | split_method: "time:auto" | file_namer: "standard" | apply_fixes: True - """ - result_set = Average(**locals()).calculate() return result_set class AverageShape(Operation): def _resolve_params(self, collection, **params): - """ - Resolve the input parameters to `self.params` and parameterise - collection parameter and set to `self.collection`. - """ + """Resolve the input parameters to `self.params` and parameterise collection parameter and set to `self.collection`.""" shape = params.get("shape") collection = collection_parameter.CollectionParameter(collection) @@ -109,8 +97,7 @@ def average_shape( file_namer="standard", apply_fixes=True, ): - """ - Average input dataset over indicated shape. + """Average input dataset over indicated shape. Parameters ---------- @@ -130,7 +117,7 @@ def average_shape( Examples -------- - | collection: ("cmip6.cmip..cas.fgoals-g3.historical.r1i1p1fi.Amon.tas.gn.v20190818",) + | collection: ("cmip6.cmip.cas.fgoals-g3.historical.r1i1p1fi.Amon.tas.gn.v20190818") | shape: "path_to_shape" | ignore_undetected_dims: (-5.,49.,10.,65) | output_type: "netcdf" @@ -138,7 +125,6 @@ def average_shape( | split_method: "time:auto" | file_namer: "standard" | apply_fixes: True - """ a = AverageShape(**locals()) result_set = AverageShape(**locals()).calculate() @@ -147,10 +133,7 @@ def average_shape( class AverageTime(Operation): def _resolve_params(self, collection, **params): - """ - Resolve the input parameters to `self.params` and parameterise - collection parameter and set to `self.collection`. - """ + """Resolve the input parameters to `self.params` and parameterise collection parameter and set to `self.collection`.""" freq = params.get("freq") collection = collection_parameter.CollectionParameter(collection) @@ -172,8 +155,7 @@ def average_time( file_namer="standard", apply_fixes=True, ): - """ - Average input dataset according over indicated frequency. + """Average input dataset according to indicated frequency. Parameters ---------- @@ -189,7 +171,6 @@ def average_time( ------- List of outputs in the selected type: a list of xarray Datasets or file paths. - Examples -------- | collection: ("cmip6.ukesm1.r1.gn.tasmax.v20200101",) @@ -199,9 +180,7 @@ def average_time( | split_method: "time:auto" | file_namer: "standard" | apply_fixes: True - """ - result_set = AverageTime(**locals()).calculate() return result_set diff --git a/daops/ops/base.py b/src/daops/ops/base.py similarity index 70% rename from daops/ops/base.py rename to src/daops/ops/base.py index 1e130d8..77e1b72 100644 --- a/daops/ops/base.py +++ b/src/daops/ops/base.py @@ -1,14 +1,13 @@ +"""Base class for all Operations.""" + from roocs_utils.parameter import collection_parameter from daops.processor import process -from daops.utils import consolidate -from daops.utils import normalise +from daops.utils import consolidate, normalise class Operation: - """ - Base class for all Operations. - """ + """Base class for all Operations.""" def __init__( self, @@ -20,11 +19,10 @@ def __init__( apply_fixes=True, **params, ): - """ - Constructor for each operation. + """Construct operation. + Sets common input parameters as attributes. - Parameters that are specific to each operation are handled in: - self._resolve_params() + Parameters that are specific to each operation are handled in: self._resolve_params(). """ self._file_namer = file_namer self._split_method = split_method @@ -35,20 +33,16 @@ def __init__( self._consolidate_collection() def _resolve_params(self, collection, **params): - """ - Resolve the operation-specific input parameters to `self.params` and parameterise - collection parameter and set to `self.collection`. - """ + """Resolve the operation-specific input parameters to `self.params` and parameterise collection parameter and set to `self.collection`.""" self.collection = collection_parameter.CollectionParameter(collection) self.params = params def _consolidate_collection(self): - """ - Take in the collection object and finds the file paths relating to each input dataset. - If a time range has been supplied then then only the files relating to this time range are recorded. + """Take in the collection object and finds the file paths relating to each input dataset. + + If a time range has been supplied then only the files relating to this time range are recorded. Set the result to `self.collection`. """ - if "time" in self.params: self.collection = consolidate.consolidate( self.collection, time=self.params.get("time") @@ -58,12 +52,13 @@ def _consolidate_collection(self): self.collection = consolidate.consolidate(self.collection) def get_operation_callable(self): + """Return the operation callable from clisops.""" raise NotImplementedError def calculate(self): - """ - The `calculate()` method processes the input and calculates the result using clisops. - It then returns the result as a daops.normalise.ResultSet object + """Process the input and calculate the result using clisops. + + It then returns the result as a daops.normalise.ResultSet object. """ config = { "output_type": self._output_type, @@ -80,12 +75,12 @@ def calculate(self): rs = normalise.ResultSet(vars()) # change name of data ref here - for dset, norm_collection in norm_collection.items(): + for dset, collection in norm_collection.items(): # Process each input dataset (either in series or # parallel) rs.add( dset, - process(self.get_operation_callable(), norm_collection, **self.params), + process(self.get_operation_callable(), collection, **self.params), ) return rs diff --git a/daops/ops/regrid.py b/src/daops/ops/regrid.py similarity index 87% rename from daops/ops/regrid.py rename to src/daops/ops/regrid.py index d255ade..e26db22 100644 --- a/daops/ops/regrid.py +++ b/src/daops/ops/regrid.py @@ -1,6 +1,7 @@ +"""Regrid operation.""" + from clisops.ops.regrid import regrid as clisops_regrid from roocs_utils.parameter import collection_parameter -from roocs_utils.parameter import dimension_parameter from daops.ops.base import Operation @@ -11,10 +12,7 @@ class Regrid(Operation): def _resolve_params(self, collection, **params): - """ - Resolve the input parameters to `self.params` and parameterise - collection parameter and set to `self.collection`. - """ + """Resolve the input parameters to `self.params` and parameterise collection parameter and set to `self.collection`.""" # need to verify method, grid and adaptive masking threshold are correct format - parameters? collection = collection_parameter.CollectionParameter(collection) @@ -40,8 +38,8 @@ def regrid( file_namer="standard", apply_fixes=True, ): - """ - Regrid input dataset according to specified method and output grid. + """Regrid input dataset according to specified method and output grid. + The adaptive masking threshold can also be specified. Parameters @@ -74,7 +72,6 @@ def regrid( | apply_fixes: True """ - result_set = Regrid(**locals()).calculate() return result_set diff --git a/daops/ops/subset.py b/src/daops/ops/subset.py similarity index 90% rename from daops/ops/subset.py rename to src/daops/ops/subset.py index 8946dc7..cdf3d47 100644 --- a/daops/ops/subset.py +++ b/src/daops/ops/subset.py @@ -1,3 +1,5 @@ +"""Subset operation.""" + from clisops.ops.subset import subset as clisops_subset from roocs_utils.parameter import parameterise @@ -10,10 +12,7 @@ class Subset(Operation): def _resolve_params(self, collection, **params): - """ - Resolve the subset parameters to `self.params` and parameterise - collection parameter and set to self.collection. - """ + """Resolve the subset parameters to `self.params` and parameterise collection parameter and set to self.collection.""" parameters = parameterise( collection=collection, time=params.get("time"), @@ -41,9 +40,9 @@ def subset( file_namer="standard", apply_fixes=True, ): - """ - Subset input dataset according to parameters. - Can be subsetted by level, area and time. + """Subset input dataset according to parameters. + + Can be subsetted by level, area, and time. Parameters ---------- @@ -81,7 +80,6 @@ def subset( | apply_fixes: True """ - result_set = Subset(**locals()).calculate() return result_set diff --git a/daops/processor.py b/src/daops/processor.py similarity index 66% rename from daops/processor.py rename to src/daops/processor.py index 1f8c3f7..342ef11 100644 --- a/daops/processor.py +++ b/src/daops/processor.py @@ -1,14 +1,16 @@ +"""Module to dispatch the processing operation to the correct mode (serial or parallel).""" + from loguru import logger def dispatch(operation, dset, **kwargs): - logger.info(f"NOW SENDING TO PARALLEL DISPATCH MODE...") + """Dispatch the operation to the correct mode (serial or parallel).""" + logger.info("NOW SENDING TO PARALLEL DISPATCH MODE...") return process(operation, dset, mode="serial", **kwargs) def process(operation, dset, mode="serial", **kwargs): - """Runs the processing operation on the dataset in the correct mode (in series or parallel).""" - + """Run the processing operation on the dataset in the correct mode (in series or parallel).""" op_name = operation.__name__ if mode == "serial": diff --git a/src/daops/utils/__init__.py b/src/daops/utils/__init__.py new file mode 100644 index 0000000..03ae2a7 --- /dev/null +++ b/src/daops/utils/__init__.py @@ -0,0 +1,3 @@ +"""DAOPS utility functions.""" + +from .core import is_characterised as is_characterised diff --git a/src/daops/utils/base_lookup.py b/src/daops/utils/base_lookup.py new file mode 100644 index 0000000..225e45e --- /dev/null +++ b/src/daops/utils/base_lookup.py @@ -0,0 +1,42 @@ +"""Base class used for looking up datasets in the elasticsearch indexes.""" + +import hashlib + +from elasticsearch import Elasticsearch +from roocs_utils.exceptions import InvalidProject +from roocs_utils.project_utils import derive_ds_id + +from daops import config_ + + +class Lookup: + """Base class used for looking up datasets in the elasticsearch indexes.""" + + def __init__(self, dset): # noqa: D107 + self.dset = dset + self.es = Elasticsearch( + "https://" + + config_()["elasticsearch"]["endpoint"] + + ":" + + config_()["elasticsearch"]["port"], + ) + + def convert_to_ds_id(self): + """Convert the input dataset to a drs id form to use with the elasticsearch index.""" + try: + return derive_ds_id(self.dset) + except InvalidProject as err: + raise Exception( + f"The format of {self.dset} is not known and it could not be converted to a ds id." + ) from err + + def _convert_id(self, _id): + """Convert the dataset id to an md5 checksum used to retrieve the fixes for the dataset. + + Converts to drs id format first if necessary. + """ + _id = self.convert_to_ds_id() + + m = hashlib.md5() # noqa: S324 + m.update(_id.encode("utf-8")) + return m.hexdigest() diff --git a/daops/utils/common.py b/src/daops/utils/common.py similarity index 67% rename from daops/utils/common.py rename to src/daops/utils/common.py index 5e48a6b..6c3ce3d 100644 --- a/daops/utils/common.py +++ b/src/daops/utils/common.py @@ -1,11 +1,11 @@ -from typing import List +"""Common utilities for the daops package.""" from clisops import enable_logging as _enable_logging from loguru import logger def _logging_examples() -> None: - """Testing module""" + """Enable testing module.""" logger.trace("0") logger.debug("1") logger.info("2") @@ -15,6 +15,7 @@ def _logging_examples() -> None: logger.critical("5") -def enable_logging() -> List[int]: +def enable_logging() -> list[int]: + """Enable logging for the daops package.""" logger.enable("daops") return _enable_logging() diff --git a/daops/utils/consolidate.py b/src/daops/utils/consolidate.py similarity index 86% rename from daops/utils/consolidate.py rename to src/daops/utils/consolidate.py index 9141e37..d1892cb 100644 --- a/daops/utils/consolidate.py +++ b/src/daops/utils/consolidate.py @@ -1,15 +1,16 @@ +"""Consolidate file paths for each dataset in a collection.""" + import collections -import glob import os import re -import xarray as xr from loguru import logger from roocs_utils.exceptions import InvalidCollection -from roocs_utils.project_utils import derive_ds_id -from roocs_utils.project_utils import dset_to_filepaths -from roocs_utils.project_utils import get_project_base_dir -from roocs_utils.project_utils import get_project_name +from roocs_utils.project_utils import ( + derive_ds_id, + dset_to_filepaths, + get_project_name, +) from roocs_utils.utils.file_utils import FileMapper from roocs_utils.xarray_utils.xarray_utils import is_kerchunk_file, open_xr_dataset @@ -18,20 +19,23 @@ def to_year(time_string): - "Returns the year in a time string as an integer." + """Return the year in a time string as an integer.""" return int(time_string.split("-")[0]) def get_year(value, default): - """Gets a year from a datetime string. Defaults to the value of `default` - if not defined.""" + """Get a year from a datetime string. + + Defaults to the value of `default` if not defined. + """ if value: return to_year(value) return default def get_years_from_file(fpath): - """Attempts to extract years from a file. + """Attempt to extract years from a file. + First by examining the file name. If that doesn't work then it reads the file contents and looks at the time axis. @@ -55,9 +59,9 @@ def get_years_from_file(fpath): def get_files_matching_time_range(time_param, file_paths): - """ - Using the settings in `time_param`, examine each file to see if it contains - years that are in the requested range. + """Examine each file to see if it contains years that are in the requested range. + + Uses the settings in `time_param`. The `time_param` can have three types: 1. type: "interval": @@ -68,14 +72,14 @@ def get_files_matching_time_range(time_param, file_paths): - undefined It attempts to filter out files that do not match the selected year. - For any file that we cannot do this with, the file will be read by - xarray. + For any file that we cannot do this with, the file will be read by xarray. Args: time_param (TimeParameter): time parameter of requested date/times file_paths (list): list of file paths Returns: file_paths (list): filtered list of file paths + """ # Return all file paths if no time inputs specified if time_param.type == "none": @@ -110,9 +114,9 @@ def get_files_matching_time_range(time_param, file_paths): def consolidate(collection, **kwargs): - """ - Finds the file paths relating to each input dataset. If a time range has been supplied then only the files - relating to this time range are recorded. + """Find the file paths relating to each input dataset. + + If a time range has been supplied then only the files relating to this time range are recorded. :param collection: (roocs_utils.CollectionParameter) The collection of datasets to process. :param kwargs: Arguments of the operation taking place e.g. subset, average, or re-grid. @@ -120,11 +124,13 @@ def consolidate(collection, **kwargs): relating to it. """ catalog = None - time = None + # time = None collection = _wrap_sequence(collection.value) - if not isinstance(collection[0], FileMapper) and not is_kerchunk_file(collection[0]): + if not isinstance(collection[0], FileMapper) and not is_kerchunk_file( + collection[0] + ): project = get_project_name(collection[0]) catalog = get_catalog(project) diff --git a/daops/utils/core.py b/src/daops/utils/core.py similarity index 79% rename from daops/utils/core.py rename to src/daops/utils/core.py index 97aeea3..28e3329 100644 --- a/daops/utils/core.py +++ b/src/daops/utils/core.py @@ -1,14 +1,16 @@ +"""Utility functions for the DAOPS package.""" + import collections -import xarray as xr from elasticsearch import exceptions from loguru import logger -from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset, is_kerchunk_file +from roocs_utils.xarray_utils.xarray_utils import is_kerchunk_file, open_xr_dataset -from .base_lookup import Lookup -from daops import CONFIG +from daops import config_ from daops.utils import fixer +from .base_lookup import Lookup + def _wrap_sequence(obj): if isinstance(obj, str): @@ -17,27 +19,25 @@ def _wrap_sequence(obj): class Characterised(Lookup): - """ - Characterisation lookup class to look up whether a dataset has been characterised. - """ + """Characterisation lookup class to look up whether a dataset has been characterised.""" def lookup_characterisation(self): - """ - Attempts to find datasets in the characterisation store. Returns True if they exist in the store, - returns False if not. + """Attempt to find datasets in the characterisation store. + + Returns True if they exist in the store, returns False if not. """ id = self._convert_id(self.dset) try: - self.es.get(index=CONFIG["elasticsearch"]["character_store"], id=id) + self.es.get(index=config_()["elasticsearch"]["character_store"], id=id) return True except exceptions.NotFoundError: return False def is_characterised(collection, require_all=False): - """ - Takes in a collection (an individual data reference or a sequence of them). + """Intake a collection (an individual data reference or a sequence of them). + Returns an ordered dictionary of a collection of ids with a boolean value for each stating whether the dataset has been characterised. @@ -62,8 +62,8 @@ def is_characterised(collection, require_all=False): def open_dataset(ds_id, file_paths, apply_fixes=True): - """ - Opens an xarray Dataset and applies fixes if requested. + """Open an xarray Dataset and apply fixes if requested. + Fixes are applied to the data either before or after the dataset is opened. Whether a fix is a 'pre-processor' or 'post-processor' is defined in the fix itself. @@ -80,7 +80,7 @@ def open_dataset(ds_id, file_paths, apply_fixes=True): for pre_process in fix.pre_processors: logger.info(f"Loading data with pre_processor: {pre_process.__name__}") else: - logger.info(f"Loading data") + logger.info("Loading data") ds = open_xr_dataset(file_paths, preprocess=fix.pre_processor) diff --git a/daops/utils/fixer.py b/src/daops/utils/fixer.py similarity index 68% rename from daops/utils/fixer.py rename to src/daops/utils/fixer.py index 53cea8e..4bc4919 100644 --- a/daops/utils/fixer.py +++ b/src/daops/utils/fixer.py @@ -1,20 +1,21 @@ -import json -import os +"""Apply fixes to input dataset from the elastic search index.""" + from pydoc import locate from elasticsearch import exceptions +from daops import config_ + from .base_lookup import Lookup -from daops import CONFIG -class FuncChainer(object): +class FuncChainer: """Chains functions together to allow them to be executed in one call.""" - def __init__(self, funcs): + def __init__(self, funcs): # noqa: D107 self.funcs = funcs - def __call__(self, inputs): + def __call__(self, inputs): # noqa: D102 result = inputs for f in self.funcs: result = f(result) @@ -22,18 +23,18 @@ def __call__(self, inputs): class Fixer(Lookup): - """ - Fixer class to look up fixes to apply to input dataset from the elastic search index. - Gathers fixes into pre and post processors. + """Fixer class to look up fixes to apply to input dataset from the elastic search index. + + Gathers fixes into pre- and post-processors. Pre-process fixes are chained together to allow them to be executed with one call. """ - def __init__(self, dset): + def __init__(self, dset): # noqa: D107 Lookup.__init__(self, dset) self._lookup_fix() def _gather_fixes(self, content): - """Gathers pre and post processing fixes together""" + """Gather pre- and post-processing fixes together.""" if content["_source"]["fixes"]: for fix in content["_source"]["fixes"]: ref_implementation = fix["reference_implementation"] @@ -47,7 +48,7 @@ def _gather_fixes(self, content): self.pre_processor = FuncChainer(self.pre_processors) def _lookup_fix(self): - """Looks up fixes on the elasticsearch index.""" + """Look up fixes on the elasticsearch index.""" id = self._convert_id(self.dset) self.pre_processor = None @@ -55,7 +56,7 @@ def _lookup_fix(self): self.post_processors = [] try: - content = self.es.get(index=CONFIG["elasticsearch"]["fix_store"], id=id) + content = self.es.get(index=config_()["elasticsearch"]["fix_store"], id=id) self._gather_fixes(content) except exceptions.NotFoundError: pass diff --git a/daops/utils/normalise.py b/src/daops/utils/normalise.py similarity index 84% rename from daops/utils/normalise.py rename to src/daops/utils/normalise.py index cdf95de..6003e72 100644 --- a/daops/utils/normalise.py +++ b/src/daops/utils/normalise.py @@ -1,3 +1,5 @@ +"""Normalise datasets.""" + import collections import os @@ -7,14 +9,12 @@ def normalise(collection, apply_fixes=True): - """ - Takes file paths and opens and fixes the dataset they make up. + """Take file paths, then open and fix the datasets they make up. :param collection: Ordered dictionary of ds ids and their related file paths. :param apply_fixes: Boolean. If True fixes will be applied to datasets if needed. Default is True. :return: An ordered dictionary of ds ids and their fixed xarray Dataset. """ - logger.info(f"Working on datasets: {collection}") norm_collection = collections.OrderedDict() @@ -25,21 +25,20 @@ def normalise(collection, apply_fixes=True): return norm_collection -class ResultSet(object): - """A class to hold the results from an operation e.g. subset""" +class ResultSet: + """A class to hold the results from an operation e.g. subset.""" - def __init__(self, inputs=None): + def __init__(self, inputs=None): # noqa: D107 self._results = collections.OrderedDict() self.metadata = {"inputs": inputs, "process": "something", "version": 0.1} self.file_uris = [] def add(self, dset, result): - """ - Adds outputs to an ordered dictionary with the ds id as the key. + """Add outputs to an ordered dictionary with the ds id as the key. + If the output is a file path this is also added to the file_paths variable so a list of file paths can be accessed independently. """ - self._results[dset] = result for item in result: diff --git a/src/daops/utils/testing.py b/src/daops/utils/testing.py new file mode 100644 index 0000000..122220b --- /dev/null +++ b/src/daops/utils/testing.py @@ -0,0 +1,88 @@ +import os +from pathlib import Path +from typing import Optional, Union + +from _pytest.logging import LogCaptureFixture # noqa +from clisops.utils.testing import ESGF_TEST_DATA_CACHE_DIR, ESGF_TEST_DATA_VERSION +from jinja2 import Template + + +def write_roocs_cfg(cache_dir: Union[str, Path]): + cfg_templ = """ + [project:cmip5] + base_dir = {{ base_dir }}/badc/cmip5/data/cmip5 + + [project:cmip6] + base_dir = {{ base_dir }}/badc/cmip6/data/CMIP6 + + [project:cordex] + base_dir = {{ base_dir }}/badc/cordex/data/cordex + + [project:c3s-cmip5] + base_dir = {{ base_dir }}/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5 + + [project:c3s-cmip6] + base_dir = {{ base_dir }}/badc/cmip6/data/CMIP6 + + [project:c3s-cordex] + base_dir = {{ base_dir }}/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex + """ + roocs_config = Path(cache_dir, "roocs.ini") + cfg = Template(cfg_templ).render( + base_dir=Path(ESGF_TEST_DATA_CACHE_DIR).joinpath(ESGF_TEST_DATA_VERSION) + ) + with open(roocs_config, "w") as fp: + fp.write(cfg) + + # point to roocs cfg in environment + os.environ["ROOCS_CONFIG"] = roocs_config.as_posix() + + +def get_esgf_file_paths(esgf_cache_dir: Union[str, os.PathLike[str]]): + return { + "CMIP5_TAS_FPATH": Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_200512-203011.nc", + ).as_posix(), + "CMIP5_DAY": Path( + esgf_cache_dir, + "badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp45_r1i1p1_20051201-20151130.nc", + ).as_posix(), + "CMIP6_MONTH": Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", + ).as_posix(), + "CMIP6_DAY": Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", + ).as_posix(), + "CMIP6_DECADAL": Path( + esgf_cache_dir, + "badc/cmip6/data/CMIP6/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s2004-r3i1p1f2/Amon/pr/gn/v20200417/pr_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2004-r3i1p1f2_gn_200411-200412.nc", + ).as_posix(), + } + + +class ContextLogger: + """Helper function for safe logging management in pytests.""" + + def __init__(self, caplog: Optional[LogCaptureFixture] = False): + from loguru import logger + + self.logger = logger + self.using_caplog = False + if caplog: + self.using_caplog = True + + def __enter__(self): + self.logger.enable("daops") + return self.logger + + def __exit__(self, exc_type, exc_val, exc_tb): + """If test is supplying caplog, pytest will manage teardown.""" + self.logger.disable("daops") + if not self.using_caplog: + try: + self.logger.remove() + except ValueError: + pass diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/_common.py b/tests/_common.py deleted file mode 100644 index c745fcf..0000000 --- a/tests/_common.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -import tempfile -from pathlib import Path -from typing import Optional - -from _pytest.logging import LogCaptureFixture -from jinja2 import Template - -TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) -TESTS_OUTPUTS = os.path.join(TESTS_HOME, "_outputs") -ROOCS_CFG = os.path.join(tempfile.gettempdir(), "roocs.ini") - -MINI_ESGF_CACHE_DIR = Path.home() / ".mini-esgf-data" -MINI_ESGF_MASTER_DIR = os.path.join(MINI_ESGF_CACHE_DIR, "master") - -try: - os.mkdir(TESTS_OUTPUTS) -except Exception: - pass - - -class ContextLogger: - """Helper function for safe logging management in pytests""" - - def __init__(self, caplog: Optional[LogCaptureFixture] = False): - from loguru import logger - - self.logger = logger - self.using_caplog = False - if caplog: - self.using_caplog = True - - def __enter__(self): - self.logger.enable("daops") - return self.logger - - def __exit__(self, exc_type, exc_val, exc_tb): - """If test is supplying caplog, pytest will manage teardown.""" - - self.logger.disable("daops") - if not self.using_caplog: - try: - self.logger.remove() - except ValueError: - pass - - -def write_roocs_cfg(): - cfg_templ = """ - [project:cmip5] - base_dir = {{ base_dir }}/test_data/badc/cmip5/data/cmip5 - - [project:cmip6] - base_dir = {{ base_dir }}/test_data/badc/cmip6/data/CMIP6 - - [project:cordex] - base_dir = {{ base_dir }}/test_data/badc/cordex/data/cordex - - [project:c3s-cmip5] - base_dir = {{ base_dir }}/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5 - - [project:c3s-cmip6] - base_dir = {{ base_dir }}/test_data/badc/cmip6/data/CMIP6 - - [project:c3s-cordex] - base_dir = {{ base_dir }}/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex - """ - cfg = Template(cfg_templ).render(base_dir=MINI_ESGF_MASTER_DIR) - with open(ROOCS_CFG, "w") as fp: - fp.write(cfg) - - # point to roocs cfg in environment - os.environ["ROOCS_CONFIG"] = ROOCS_CFG - - -CMIP5_TAS_FPATH = Path( - MINI_ESGF_MASTER_DIR, - "test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_200512-203011.nc", -).as_posix() - -CMIP5_DAY = Path( - MINI_ESGF_MASTER_DIR, - "test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp45/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp45_r1i1p1_20051201-20151130.nc", -).as_posix() - -CMIP6_MONTH = Path( - MINI_ESGF_MASTER_DIR, - "test_data/badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SImon/siconc/gn/latest/siconc_SImon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc", -).as_posix() - -CMIP6_DAY = Path( - MINI_ESGF_MASTER_DIR, - "test_data/badc/cmip6/data/CMIP6/CMIP/CCCma/CanESM5/historical/r1i1p1f1/SIday/siconc/gn/v20190429/siconc_SIday_CanESM5_historical_r1i1p1f1_gn_18500101-20141231.nc", -).as_posix() - -CMIP6_DECADAL = Path( - MINI_ESGF_MASTER_DIR, - "test_data/badc/cmip6/data/CMIP6/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s2004-r3i1p1f2/Amon/pr/gn/v20200417/pr_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s2004-r3i1p1f2_gn_200411-200412.nc", -).as_posix() - -CMIP6_KERCHUNK_HTTPS_OPEN_JSON = ("https://gws-access.jasmin.ac.uk/public/cmip6_prep/eodh-eocis/kc-indexes-cmip6-http-v1/" - "CMIP6.CMIP.MOHC.UKESM1-1-LL.1pctCO2.r1i1p1f2.Amon.tasmax.gn.v20220513.json") diff --git a/tests/conftest.py b/tests/conftest.py index 445eaa2..1fdc690 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,17 +1,40 @@ -import os -import shutil +from clisops.utils.testing import stratus as _stratus + +from clisops.utils.testing import ( + ESGF_TEST_DATA_CACHE_DIR, + ESGF_TEST_DATA_REPO_URL, + ESGF_TEST_DATA_VERSION, + gather_testing_data, +) -import numpy as np import pytest -import xarray as xr -from git import Repo -from tests._common import MINI_ESGF_CACHE_DIR -from tests._common import write_roocs_cfg +from daops.utils.testing import write_roocs_cfg as _write_roocs_cfg, get_esgf_file_paths + + +@pytest.fixture +def cmip6_kerchunk_https_open_json(): + return ( + "https://gws-access.jasmin.ac.uk/public/cmip6_prep/eodh-eocis/kc-indexes-cmip6-http-v1/" + "CMIP6.CMIP.MOHC.UKESM1-1-LL.1pctCO2.r1i1p1f2.Amon.tasmax.gn.v20220513.json" + ) + -write_roocs_cfg() +@pytest.fixture(scope="session", autouse=True) +def write_roocs_cfg(stratus): + _write_roocs_cfg(stratus.path) + # TODO: reload configs in clisops and roocs_utils + # workaround ... fix code in new clisops. + import roocs_utils + import clisops + from roocs_utils.config import get_config as _get_config -ESGF_TEST_DATA_REPO_URL = "https://github.com/roocs/mini-esgf-data" + roocs_utils.project_utils.CONFIG = _get_config(roocs_utils) + roocs_utils.CONFIG = _get_config(roocs_utils) + clisops.CONFIG = _get_config(clisops) + # clisops.core.regrid.CONFIG = _get_config(clisops) + # clisops.utils.file_namers.CONFIG = _get_config(clisops) + # clisops.utils.output_utils.CONFIG = _get_config(clisops) @pytest.fixture @@ -19,24 +42,34 @@ def tmp_netcdf_filename(tmp_path): return tmp_path.joinpath("testfile.nc") -# Fixture to load mini-esgf-data repository used by roocs tests -@pytest.fixture -def load_esgf_test_data(): +@pytest.fixture(scope="session") +def stratus(): + return _stratus( + repo=ESGF_TEST_DATA_REPO_URL, + branch=ESGF_TEST_DATA_VERSION, + cache_dir=(ESGF_TEST_DATA_CACHE_DIR), + ) + + +@pytest.fixture(scope="session", autouse=True) +def load_test_data(stratus): """ This fixture ensures that the required test data repository has been cloned to the cache directory within the home directory. """ - branch = "master" - target = os.path.join(MINI_ESGF_CACHE_DIR, branch) + repositories = { + "stratus": { + "worker_cache_dir": stratus.path, + "repo": ESGF_TEST_DATA_REPO_URL, + "branch": ESGF_TEST_DATA_VERSION, + "cache_dir": ESGF_TEST_DATA_CACHE_DIR, + }, + } - if not os.path.isdir(MINI_ESGF_CACHE_DIR): - os.makedirs(MINI_ESGF_CACHE_DIR) + for name, repo in repositories.items(): + gather_testing_data(worker_id="master", **repo) - if not os.path.isdir(target): - repo = Repo.clone_from(ESGF_TEST_DATA_REPO_URL, target) - repo.git.checkout(branch) - elif os.environ.get("ROOCS_AUTO_UPDATE_TEST_DATA", "true").lower() != "false": - repo = Repo(target) - repo.git.checkout(branch) - repo.remotes[0].pull() +@pytest.fixture(scope="session", autouse=True) +def mini_esgf_data(stratus): + return get_esgf_file_paths(stratus.path) diff --git a/tests/test_catalog/__init__.py b/tests/test_catalog/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_catalog/test_intake.py b/tests/test_catalog/test_intake.py index a401b17..ee16968 100644 --- a/tests/test_catalog/test_intake.py +++ b/tests/test_catalog/test_intake.py @@ -1,7 +1,4 @@ from daops.catalog import IntakeCatalog -from tests._common import MINI_ESGF_MASTER_DIR - -CMIP6_BASE_DIR = f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6" C3S_CMIP6_DAY_COLLECTION = ( @@ -15,17 +12,19 @@ ) -def test_intake_catalog_c3s_cmip6_mon(): +def test_intake_catalog_c3s_cmip6_mon(stratus): + cmip6_base_dir = f"{stratus.path}/badc/cmip6/data/CMIP6" + cat = IntakeCatalog(project="c3s-cmip6") result = cat.search(collection=C3S_CMIP6_MON_COLLECTION) assert result.matches == 1 files = result.files()[C3S_CMIP6_MON_COLLECTION] assert len(files) == 1 expected_file = ( - f"{CMIP6_BASE_DIR}/ScenarioMIP/INM/INM-CM5-0/ssp245/r1i1p1f1/Amon/rlds/gr1/v20190619/" + f"{cmip6_base_dir}/ScenarioMIP/INM/INM-CM5-0/ssp245/r1i1p1f1/Amon/rlds/gr1/v20190619/" "rlds_Amon_INM-CM5-0_ssp245_r1i1p1f1_gr1_201501-210012.nc" ) - assert expected_file == files[0] + assert expected_file in files[0] # check download url urls = result.download_urls()[C3S_CMIP6_MON_COLLECTION] expected_url = ( diff --git a/tests/test_cli.py b/tests/test_cli.py index 276dbfe..efc1979 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,4 @@ -""" -Test the command line interface. +"""Test the command line interface. This module is based on test_subset.py, but the tests are made to use the CLI instead of calling 'subset' directly. String values are required by the CLI, and it is called @@ -8,22 +7,16 @@ input types. """ +import configparser import os +import subprocess as sp +import tempfile import numpy as np +import py.path import pytest import xarray as xr -import subprocess as sp -import py.path -import configparser -import tempfile - -from daops import CONFIG -from tests._common import CMIP5_DAY -from tests._common import CMIP5_TAS_FPATH -from tests._common import CMIP6_DAY -from tests._common import CMIP6_MONTH -from tests._common import MINI_ESGF_MASTER_DIR +from daops import config_ CMIP5_IDS = [ "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", @@ -69,10 +62,7 @@ def __init__(self, file_uris): def _make_tmp_config(config_file, config_overrides): - """ - Given a config file path and a list of (section, item, value) 3-tuples, - create a temporary config file and return the path - """ + """Given a config file path and a list of (section, item, value) 3-tuples, create a temporary config file and return the path.""" config = configparser.ConfigParser() config.read(config_file) for section, item, value in config_overrides: @@ -88,11 +78,7 @@ def _make_tmp_config(config_file, config_overrides): def _cli_subset(*args, config_overrides=None, **kwargs): - """ - A function that behaves somewhat similarly to calling subset directly, - but instead wraps the CLI using subprocess. - """ - + """A function that behaves somewhat similarly to calling subset directly, but instead wraps the CLI using subprocess.""" config_env_var = "ROOCS_CONFIG" collections = args @@ -145,7 +131,7 @@ def _cli_subset(*args, config_overrides=None, **kwargs): @pytest.mark.online -def test_cli_subset_zostoga(tmpdir, load_esgf_test_data): +def test_cli_subset_zostoga(tmpdir): result = _cli_subset( CMIP5_IDS[0], time="2085-01-16/2120-12-16", @@ -162,7 +148,7 @@ def test_cli_subset_zostoga(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_t(tmpdir, load_esgf_test_data): +def test_cli_subset_t(tmpdir): result = _cli_subset( CMIP5_IDS[1], time="2085-01-16/2120-12-16", @@ -186,10 +172,9 @@ def test_cli_subset_collection_as_empty_string(tmpdir): @pytest.mark.online -def test_cli_subset_t_y_x(tmpdir, load_esgf_test_data): +def test_cli_subset_t_y_x(tmpdir, stratus): fpath = ( - f"{MINI_ESGF_MASTER_DIR}/" - "test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/" "atmos/Amon/r1i1p1/latest/tas/*.nc" ) @@ -214,10 +199,9 @@ def test_cli_subset_t_y_x(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_t_z_y_x(tmpdir, load_esgf_test_data): +def test_cli_subset_t_z_y_x(tmpdir, stratus): fpath = ( - f"{MINI_ESGF_MASTER_DIR}/" - "test_data/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" + f"{stratus.path}/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" "GFDL-ESM4/historical/r1i1p1f1/Amon/o3/gr1/v20190726/" "o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.nc" ) @@ -229,27 +213,30 @@ def test_cli_subset_t_z_y_x(tmpdir, load_esgf_test_data): ) assert ds.o3.shape == (1200, 19, 2, 3) - assert list(ds.o3.coords["plev"].values) == [ - 100000.0, - 92500.0, - 85000.0, - 70000.0, - 60000.0, - 50000.0, - 40000.0, - 30000.0, - 25000.0, - 20000.0, - 15000.0, - 10000.0, - 7000.0, - 5000.0, - 3000.0, - 2000.0, - 1000.0, - 500.0, - 100.0, - ] + np.testing.assert_array_equal( + ds.o3.coords["plev"], + [ + 100000.0, + 92500.0, + 85000.0, + 70000.0, + 60000.0, + 50000.0, + 40000.0, + 30000.0, + 25000.0, + 20000.0, + 15000.0, + 10000.0, + 7000.0, + 5000.0, + 3000.0, + 2000.0, + 1000.0, + 500.0, + 100.0, + ], + ) result = _cli_subset( CMIP6_IDS[0], @@ -266,7 +253,7 @@ def test_cli_subset_t_z_y_x(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_t_with_invalid_date(tmpdir, load_esgf_test_data): +def test_cli_subset_t_with_invalid_date(tmpdir): with pytest.raises(_CliFail) as exc: _cli_subset( CMIP5_IDS[1], @@ -298,7 +285,7 @@ def zostoga_id(request): @pytest.mark.online -def test_time_is_none(tmpdir, load_esgf_test_data): +def test_time_is_none(tmpdir): result = _cli_subset( CMIP5_IDS[1], area="0,-10,120,40", @@ -310,7 +297,7 @@ def test_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", ), use_cftime=True, @@ -326,7 +313,7 @@ def test_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_end_time_is_none(tmpdir, load_esgf_test_data): +def test_end_time_is_none(tmpdir): result = _cli_subset( CMIP5_IDS[2], time="1940-10-14/", @@ -338,7 +325,7 @@ def test_end_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", ), use_cftime=True, @@ -352,7 +339,7 @@ def test_end_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_start_time_is_none(tmpdir, load_esgf_test_data): +def test_start_time_is_none(tmpdir): result = _cli_subset( CMIP5_IDS[1], time="/2120-12-16", @@ -364,7 +351,7 @@ def test_start_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", ), use_cftime=True, @@ -378,7 +365,7 @@ def test_start_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_time_invariant_subset_standard_name(tmpdir, load_esgf_test_data): +def test_time_invariant_subset_standard_name(tmpdir): dset = "CMIP6.ScenarioMIP.IPSL.IPSL-CM6A-LR.ssp119.r1i1p1f1.fx.mrsofc.gr.v20190410" result = _cli_subset( @@ -393,11 +380,11 @@ def test_time_invariant_subset_standard_name(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_with_multiple_collections(tmpdir, load_esgf_test_data): +def test_cli_subset_with_multiple_collections(tmpdir, stratus): file_paths = [ - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_200512-203011.nc", - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_203012-205511.nc", ] @@ -413,7 +400,7 @@ def test_cli_subset_with_multiple_collections(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_with_catalog(tmpdir, load_esgf_test_data): +def test_cli_subset_with_catalog(tmpdir): # c3s-cmip6 dataset so will use catalog in consolidate result = _cli_subset( "c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.Amon.rlds.gr1.v20190619", @@ -429,10 +416,10 @@ def test_cli_subset_with_catalog(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_with_catalog_time_invariant(tmpdir, load_esgf_test_data): +def test_cli_subset_with_catalog_time_invariant(tmpdir): # c3s-cmip6 dataset so will use catalog in consolidate result = _cli_subset( - f"c3s-cmip6.ScenarioMIP.MPI-M.MPI-ESM1-2-LR.ssp370.r1i1p1f1.fx.mrsofc.gn.v20190815", + "c3s-cmip6.ScenarioMIP.MPI-M.MPI-ESM1-2-LR.ssp370.r1i1p1f1.fx.mrsofc.gn.v20190815", output_dir=tmpdir, output_type="nc", file_namer="standard", @@ -442,13 +429,16 @@ def test_cli_subset_with_catalog_time_invariant(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_by_time_components_year_month(tmpdir, load_esgf_test_data): +def test_cli_subset_by_time_components_year_month(tmpdir, mini_esgf_data): tc1 = _time_components_str(year=(2021, 2022), month=["dec", "jan", "feb"]) tc2 = _time_components_str(year=(2021, 2022), month=[12, 1, 2]) for tc in (tc1, tc2): result = _cli_subset( - CMIP5_TAS_FPATH, time_components=tc, output_dir=tmpdir, file_namer="simple" + mini_esgf_data["CMIP5_TAS_FPATH"], + time_components=tc, + output_dir=tmpdir, + file_namer="simple", ) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) @@ -459,14 +449,17 @@ def test_cli_subset_by_time_components_year_month(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_by_time_components_month_day(tmpdir, load_esgf_test_data): +def test_cli_subset_by_time_components_month_day(tmpdir, mini_esgf_data): # 20051201-20151130 tc1 = _time_components_str(month=["jul"], day=[1, 11, 21]) tc2 = _time_components_str(month=[7], day=[1, 11, 21]) for tc in (tc1, tc2): result = _cli_subset( - CMIP5_DAY, time_components=tc, output_dir=tmpdir, file_namer="simple" + mini_esgf_data["CMIP5_DAY"], + time_components=tc, + output_dir=tmpdir, + file_namer="simple", ) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) @@ -478,9 +471,7 @@ def test_cli_subset_by_time_components_month_day(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_by_time_interval_and_components_month_day( - tmpdir, load_esgf_test_data -): +def test_cli_subset_by_time_interval_and_components_month_day(tmpdir, mini_esgf_data): # 20051201-20151130 ys, ye = 2007, 2010 ti = f"{ys}-12-01T00:00:00/{ye}-11-30T23:59:59" @@ -493,7 +484,7 @@ def test_cli_subset_by_time_interval_and_components_month_day( for tc in (tc1, tc2): result = _cli_subset( - CMIP5_DAY, + mini_esgf_data["CMIP5_DAY"], time=ti, time_components=tc, output_dir=tmpdir, @@ -535,7 +526,7 @@ def test_cli_subset_by_time_interval_and_components_month_day( @pytest.mark.online def test_cli_subset_by_time_series_and_components_month_day_cmip6( - tmpdir, load_esgf_test_data + tmpdir, mini_esgf_data ): # 18500101-20141231 @@ -547,7 +538,7 @@ def test_cli_subset_by_time_series_and_components_month_day_cmip6( ys, ye = 1998, 2010 req_times = [ tm.isoformat() - for tm in xr.open_dataset(CMIP6_DAY).time.values + for tm in xr.open_dataset(mini_esgf_data["CMIP6_DAY"]).time.values if ys <= tm.year <= ye ] @@ -560,7 +551,7 @@ def test_cli_subset_by_time_series_and_components_month_day_cmip6( for tc in (tc1, tc2): result = _cli_subset( - CMIP6_DAY, + mini_esgf_data["CMIP6_DAY"], time=ts, time_components=tc, output_dir=tmpdir, @@ -576,16 +567,16 @@ def test_cli_subset_by_time_series_and_components_month_day_cmip6( @pytest.mark.online -def test_cli_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): - #  tests key error is raised when trying to select a non existent day on a monthly dataset +def test_cli_subset_components_day_monthly_dataset(tmpdir, mini_esgf_data): + # tests key error is raised when trying to select a nonexistent day on a monthly dataset # 18500101-20141231 # allow use of dataset - defaults to c3s-cmip6 and this is not in the catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False ys, ye = 1998, 2010 req_times = [ tm.isoformat() - for tm in xr.open_dataset(CMIP6_MONTH).time.values + for tm in xr.open_dataset(mini_esgf_data["CMIP6_MONTH"]).time.values if ys <= tm.year <= ye ] @@ -597,7 +588,7 @@ def test_cli_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): with pytest.raises(_CliFail) as exc: _cli_subset( - CMIP6_MONTH, + mini_esgf_data["CMIP6_MONTH"], time=ts, time_components=tc, output_dir=tmpdir, @@ -606,12 +597,14 @@ def test_cli_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_by_time_series(tmpdir, load_esgf_test_data): - t = [str(tm) for tm in xr.open_dataset(CMIP5_TAS_FPATH).time.values] +def test_cli_subset_by_time_series(tmpdir, mini_esgf_data): + t = [ + str(tm) for tm in xr.open_dataset(mini_esgf_data["CMIP5_TAS_FPATH"]).time.values + ] some_times = [t[0], t[100], t[4], t[33], t[9]] result = _cli_subset( - CMIP5_TAS_FPATH, + mini_esgf_data["CMIP5_TAS_FPATH"], time=",".join(some_times), output_dir=tmpdir, file_namer="simple", @@ -628,7 +621,7 @@ def test_cli_subset_by_time_series(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_by_level_series(tmpdir, load_esgf_test_data): +def test_cli_subset_by_level_series(tmpdir): some_levels = [60000.0, 15000.0, 40000.0, 1000.0, 92500.0] result = _cli_subset( @@ -649,7 +642,7 @@ def test_cli_subset_by_level_series(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): +def test_cli_subset_cmip6_nc_consistent_bounds(tmpdir): """Test daops subset to check consistent bounds in metadata.""" result = _cli_subset( CMIP6_IDS[0], @@ -674,7 +667,7 @@ def test_cli_subset_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_cli_subset_c3s_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): +def test_cli_subset_c3s_cmip6_nc_consistent_bounds(tmpdir): """Test daops subset to check consistent bounds in metadata.""" result = _cli_subset( C3S_CMIP6_IDS[0], diff --git a/tests/test_daops.py b/tests/test_daops.py index a173fe0..16e896b 100644 --- a/tests/test_daops.py +++ b/tests/test_daops.py @@ -1,4 +1,5 @@ -""" Tests for daops library """ +"""Tests for daops library""" + # FutureWarning: In xarray version 0.15 the default behaviour of `open_mfdataset` # will change. To retain the existing behavior, pass # combine='nested'. To use future default behavior, pass @@ -13,7 +14,8 @@ def test_subset_data_ref(): def test_subset_time(): """Tests daops api.subset function with a time subset. - Check ResultSet contains the correct info""" + Check ResultSet contains the correct info + """ pass @@ -24,7 +26,8 @@ def test_subset_invalid_time(): def test_subset_space(): """Tests daops api.subset function with a space subset. - Check ResultSet contains the correct info""" + Check ResultSet contains the correct info + """ pass @@ -35,7 +38,8 @@ def test_subset_invalid_space(): def test_subset_level(): """Tests daops api.subset function with a level subset. - Check ResultSet contains the correct info""" + Check ResultSet contains the correct info + """ pass @@ -46,67 +50,78 @@ def test_subset_invalid_level(): def test_subset_all(): """Tests daops api.subset function with time, space, level subsets. - Check ResultSet contains the correct info""" + Check ResultSet contains the correct info + """ pass def test_wrap_sequence_str(): """Tests daops utils._wrap_sequence with string. - Check correct type is returned when string passed.""" + Check correct type is returned when string passed. + """ pass def test_wrap_sequence_not_str(): """Tests daops utils._wrap_sequence with object that isn't a string. - Check correct response when passed.""" + Check correct response when passed. + """ pass def test_is_data_ref_characterised_true(): """Tests daops utils.is_dataref_characterised. - Check correct response for data ref that is characterised.""" + Check correct response for data ref that is characterised. + """ pass def test_is_data_ref_characterised_false(): """Tests daops utils.is_dataref_characterised. - Check correct response for data ref that is not characterised.""" + Check correct response for data ref that is not characterised. + """ pass def test_is_characterised_all_required_true_mixed(): """Tests daops utils.is_characterised. - Check response when all required is True for mixed characterisation.""" + Check response when all required is True for mixed characterisation. + """ pass def test_is_characterised_all_required_true_all(): """Tests daops utils.is_characterised. - Check response when all required is True for all characterised.""" + Check response when all required is True for all characterised. + """ pass def test_is_characterised_all_required_true_none(): """Tests daops utils.is_characterised. - Check response when all required is True for none characterised.""" + Check response when all required is True for none characterised. + """ pass def test_is_characterised_all_required_false_mixed(): """Tests daops utils.is_characterised. - Check response when all required is False for mixed characterisation.""" + Check response when all required is False for mixed characterisation. + """ pass def test_is_characterised_all_required_false_all(): """Tests daops utils.is_characterised. - Check response when all required is False for all characterised.""" + Check response when all required is False for all characterised. + """ pass def test_is_characterised_all_required_false_none(): """Tests daops utils.is_characterised. - Check response when all required is False for none characterised.""" + Check response when all required is False for none characterised. + """ pass @@ -120,7 +135,8 @@ def test_consolidate_data_ref_fpath(): def test_consolidate_data_ref_drs(): """Tests daops utils._consolidate_data_ref with DRS e.g. - cmip5.output1.MOHC.HadGEM2-ES.historical.mon.land.Lmon.r1i1p1.latest.rh""" + cmip5.output1.MOHC.HadGEM2-ES.historical.mon.land.Lmon.r1i1p1.latest.rh + """ pass @@ -131,43 +147,50 @@ def test_consolidate_data_ref_invalid(): def test_consolidate_mixed(): """Tests daops utils.consolidate. - Test when drefs are a mixture of file paths and drs.""" + Test when drefs are a mixture of file paths and drs. + """ pass def test_consolidate_all_fpath(): """Tests daops utils.consolidate. - Test when drefs are all file paths.""" + Test when drefs are all file paths. + """ pass def test_consolidate_one_invalid(): """Tests daops utils.consolidate. - Test when one dref is invalid.""" + Test when one dref is invalid. + """ pass def test_consolidate_all_dref(): """Tests daops utils.consolidate. - Test when drefs are all drs.""" + Test when drefs are all drs. + """ pass def test_consolidate_with_time(): """Tests daops utils.consolidate. - Test when a valid time is passed as a kwarg.""" + Test when a valid time is passed as a kwarg. + """ pass def test_consolidate_with_invalid_time(): """Tests daops utils.consolidate. - Test when an invalid time range is passed as a kwarg.""" + Test when an invalid time range is passed as a kwarg. + """ pass def test_consolidate_all_kwargs(): """Tests daops utils.consolidate. - Test when all kwargs are provided.""" + Test when all kwargs are provided. + """ pass @@ -183,13 +206,15 @@ def test_normalise_character_no_problem(): def test_ResultSet_init(): """Tests init function of ResultSet class in daops utils. - Checks the metadata is as expected.""" + Checks the metadata is as expected. + """ pass def test_ResultSet_add(): """Tests add function of ResultSet class in daops utils. - Checks the file paths and _results are expected.""" + Checks the file paths and _results are expected. + """ pass diff --git a/tests/test_data_utils/test_array_utils.py b/tests/test_data_utils/test_array_utils.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_data_utils/test_attr_utils.py b/tests/test_data_utils/test_attr_utils.py index bafea36..2a0f2af 100644 --- a/tests/test_data_utils/test_attr_utils.py +++ b/tests/test_data_utils/test_attr_utils.py @@ -1,17 +1,15 @@ -import numpy as np import xarray as xr +from daops.data_utils.attr_utils import ( + add_global_attrs_if_needed, + edit_global_attrs, + edit_var_attrs, +) from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset -from daops.data_utils.attr_utils import add_global_attrs_if_needed -from daops.data_utils.attr_utils import edit_global_attrs -from daops.data_utils.attr_utils import edit_var_attrs -from tests._common import CMIP6_DECADAL -from tests._common import MINI_ESGF_MASTER_DIR - -def test_edit_var_attrs(load_esgf_test_data): +def test_edit_var_attrs(stratus): ds = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", combine="by_coords", use_cftime=True, ) @@ -33,9 +31,9 @@ def test_edit_var_attrs(load_esgf_test_data): assert ds_change_var_attrs.lat.attrs["long_name"] == "False long name" -def test_edit_global_attrs(load_esgf_test_data): +def test_edit_global_attrs(stratus): ds = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", combine="by_coords", use_cftime=True, ) @@ -62,8 +60,8 @@ def test_edit_global_attrs(load_esgf_test_data): assert ds_change_global_attrs.attrs["test"] == "this is a new test attribute" -def test_edit_global_attrs_with_derive(load_esgf_test_data): - ds = open_xr_dataset(CMIP6_DECADAL) +def test_edit_global_attrs_with_derive(mini_esgf_data): + ds = open_xr_dataset(mini_esgf_data["CMIP6_DECADAL"]) ds_id = "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417" assert ds.attrs.get("startdate") is None @@ -81,8 +79,8 @@ def test_edit_global_attrs_with_derive(load_esgf_test_data): assert ds_change_global_attrs.attrs["sub_experiment_id"] == "s200411" -def test_edit_global_attrs_with_derive_and_arg(load_esgf_test_data): - ds = open_xr_dataset(CMIP6_DECADAL) +def test_edit_global_attrs_with_derive_and_arg(mini_esgf_data): + ds = open_xr_dataset(mini_esgf_data["CMIP6_DECADAL"]) ds_id = "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417" assert ds.attrs.get("forcing_description") is None @@ -100,9 +98,9 @@ def test_edit_global_attrs_with_derive_and_arg(load_esgf_test_data): ) -def test_add_global_attrs_if_needed(load_esgf_test_data): +def test_add_global_attrs_if_needed(stratus): ds = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", combine="by_coords", use_cftime=True, ) diff --git a/tests/test_data_utils/test_coord_utils.py b/tests/test_data_utils/test_coord_utils.py index 677b29d..46af91a 100644 --- a/tests/test_data_utils/test_coord_utils.py +++ b/tests/test_data_utils/test_coord_utils.py @@ -1,18 +1,12 @@ -import cftime import numpy as np import xarray as xr +from daops.data_utils.coord_utils import add_coord, add_scalar_coord, squeeze_dims from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset -from daops.data_utils.coord_utils import add_coord -from daops.data_utils.coord_utils import add_scalar_coord -from daops.data_utils.coord_utils import squeeze_dims -from tests._common import CMIP6_DECADAL -from tests._common import MINI_ESGF_MASTER_DIR - -def test_squeeze_dims(load_esgf_test_data): +def test_squeeze_dims(stratus): ds = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/" "inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc", combine="by_coords", use_cftime=True, @@ -27,16 +21,16 @@ def test_squeeze_dims(load_esgf_test_data): assert "lev" not in ds_squeeze.dims -def test_add_scalar_coord(load_esgf_test_data): +def test_add_scalar_coord(stratus): ds_no_height = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", combine="by_coords", use_cftime=True, ) ds_id = "cmip5.output1.ICHEC.EC-EARTH.historical.mon.atmos.Amon.r1i1p1.latest.tas" ds_with_height = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/inmcm4/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/inmcm4/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", combine="by_coords", use_cftime=True, ) @@ -57,8 +51,8 @@ def test_add_scalar_coord(load_esgf_test_data): assert ds_no_height.height.attrs == ds_with_height.height.attrs -def test_add_scalar_coord_with_derive(load_esgf_test_data): - ds_no_reftime = open_xr_dataset(CMIP6_DECADAL) +def test_add_scalar_coord_with_derive(mini_esgf_data): + ds_no_reftime = open_xr_dataset(mini_esgf_data["CMIP6_DECADAL"]) ds_id = "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417" operands = { @@ -82,9 +76,9 @@ def test_add_scalar_coord_with_derive(load_esgf_test_data): assert value == "2004-11-01T00:00:00" -def test_add_coord(load_esgf_test_data): +def test_add_coord(stratus): ds_no_leadtime = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc", combine="by_coords", use_cftime=True, ) @@ -111,8 +105,8 @@ def test_add_coord(load_esgf_test_data): ) -def test_add_coord_with_derive(load_esgf_test_data): - ds_no_leadtime = open_xr_dataset(CMIP6_DECADAL) +def test_add_coord_with_derive(mini_esgf_data): + ds_no_leadtime = open_xr_dataset(mini_esgf_data["CMIP6_DECADAL"]) ds_id = "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417" operands = { diff --git a/tests/test_data_utils/test_var_utils.py b/tests/test_data_utils/test_var_utils.py index 1255420..bb8f6cc 100644 --- a/tests/test_data_utils/test_var_utils.py +++ b/tests/test_data_utils/test_var_utils.py @@ -1,12 +1,10 @@ import xarray as xr - from daops.data_utils.var_utils import add_data_var -from tests._common import MINI_ESGF_MASTER_DIR -def test_add_data_var(load_esgf_test_data): +def test_add_data_var(stratus): ds = xr.open_mfdataset( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/" "inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc", combine="by_coords", use_cftime=True, diff --git a/tests/test_fixes_applied.py b/tests/test_fixes_applied.py index 5f7e629..9171597 100644 --- a/tests/test_fixes_applied.py +++ b/tests/test_fixes_applied.py @@ -1,14 +1,11 @@ -import math import os import cftime import numpy as np import pytest import xarray as xr - -from daops import CONFIG +from daops import config_ from daops.ops.subset import subset -from tests._common import MINI_ESGF_MASTER_DIR def _check_output_nc(result, fname="output_001.nc"): @@ -16,14 +13,14 @@ def _check_output_nc(result, fname="output_001.nc"): @pytest.mark.online -def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_MOHC_mon(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417", @@ -87,18 +84,18 @@ def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_MOHC_day(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s1960-r2i1p1f2.day.tasmin.gn.v20200417", @@ -163,18 +160,18 @@ def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_EC_Earth_mon(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.EC-Earth-Consortium.EC-Earth3.dcppA-hindcast.s1960-r6i2p1f1.Amon.tas.gr.v20200508", @@ -238,18 +235,18 @@ def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_EC_Earth_day(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.EC-Earth-Consortium.EC-Earth3.dcppA-hindcast.s1961-r6i2p1f1.day.pr.gr.v20200508", @@ -313,18 +310,18 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.EC-Earth-Consortium.EC-Earth3.dcppA-hindcast.s1960-r2i1p1f1.Amon.tas.gr.v20201215", @@ -388,18 +385,18 @@ def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_MPI_M_mon(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.MPI-M.MPI-ESM1-2-HR.dcppA-hindcast.s1960-r10i1p1f1.Amon.tas.gn.v20200908", @@ -464,18 +461,18 @@ def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_MPI_M_day(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.MPI-M.MPI-ESM1-2-HR.dcppA-hindcast.s1960-r2i1p1f1.day.pr.gn.v20190929", @@ -540,18 +537,18 @@ def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.online -def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_CMCC_mon(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.CMCC.CMCC-CM2-SR5.dcppA-hindcast.s1960-r10i1p1f1.Amon.pr.gn.v20210719", @@ -616,19 +613,19 @@ def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index @pytest.mark.skip(reason="no CMCC day datasets on c3s fix index") @pytest.mark.online -def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data): +def test_fixes_applied_decadal_CMCC_day(tmpdir): # change fix index to test index which holds these decadal fixes - fix_index = CONFIG["elasticsearch"]["fix_store"] + fix_index = config_()["elasticsearch"]["fix_store"] test_fix_index = "c3s-roocs-fix" - CONFIG["elasticsearch"]["fix_store"] = test_fix_index + config_()["elasticsearch"]["fix_store"] = test_fix_index # don't use catalog - decadal datasets not in current catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False result = subset( "c3s-cmip6.DCPP.CMCC.CMCC-CM2-SR5.dcppA-hindcast.s1960-r1i1p1f1.day.tas.gn.v20210806", @@ -693,4 +690,4 @@ def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data): assert ds.time_bnds.encoding.get("coordinates") is None # change fix index back - CONFIG["elasticsearch"]["fix_store"] = fix_index + config_()["elasticsearch"]["fix_store"] = fix_index diff --git a/tests/test_func_chainer.py b/tests/test_func_chainer.py index 0dfa638..7e61826 100644 --- a/tests/test_func_chainer.py +++ b/tests/test_func_chainer.py @@ -1,9 +1,7 @@ import pytest import xarray as xr - from daops import utils from daops.utils.fixer import FuncChainer -from tests._common import MINI_ESGF_MASTER_DIR CMIP5_IDS = [ "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", @@ -13,17 +11,18 @@ # setup for tests -def setup_module(module): +@pytest.fixture(scope="module", autouse=True) +def setup_module(module, stratus): utils.fixer.Fixer.FIX_DIR = "tests/test_fixes" module.CMIP5_FPATHS = [ - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc", - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", ] @pytest.mark.skip(reason="Look up of fixes has changed") -def test_pre_and_post_process_fix(load_esgf_test_data): +def test_pre_and_post_process_fix(): ds_test = xr.open_mfdataset(CMIP5_FPATHS[1]) ds_test["tas"].data = ds_test["tas"].data * 2 ds_test["tas"].data = ds_test["tas"].data + 100 @@ -32,7 +31,7 @@ def test_pre_and_post_process_fix(load_esgf_test_data): @pytest.mark.skip(reason="Look up of fixes has changed") -def test_post_process_fix_only(load_esgf_test_data): +def test_post_process_fix_only(): ds_test = xr.open_mfdataset(CMIP5_FPATHS[0]) ds_test["zostoga"].attrs["units"] = "s" ds_test["zostoga"].attrs["long_name"] = "silly" @@ -42,22 +41,8 @@ def test_post_process_fix_only(load_esgf_test_data): @pytest.mark.skip(reason="Look up of fixes has changed") -def test_pre_process_fix_only(load_esgf_test_data): +def test_pre_process_fix_only(): ds = xr.open_mfdataset(CMIP5_FPATHS[2]) ds_test = ds.rename({"lat": "silly_lat"}) ds_code = utils.core.open_dataset(CMIP5_IDS[2], CMIP5_FPATHS[2]) assert ds_test.dims == ds_code.dims - - -def add_5(x): - return x + 5 - - -def multiply_by_2(x): - return x * 2 - - -def test_func_chainer_simple(): - chained_func = FuncChainer([add_5, multiply_by_2]) - result = chained_func(6) - assert result == 22 diff --git a/tests/test_logging.py b/tests/test_logging.py index 197d1fd..ca0d9b0 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,16 +1,14 @@ import sys import pytest - -from daops.utils.common import _logging_examples -from daops.utils.common import enable_logging -from tests._common import ContextLogger +from daops.utils.common import _logging_examples, enable_logging +from daops.utils.testing import ContextLogger class TestLoggingFuncs: - @pytest.mark.xfail( - reason="pytest-loguru does not implement logging levels for caplog yet." - ) + # @pytest.mark.xfail( + # reason="pytest-loguru does not implement logging levels for caplog yet." + # ) def test_logging_configuration(self, caplog): with ContextLogger(caplog): caplog.set_level("WARNING", logger="daops") diff --git a/tests/test_operations/test_average.py b/tests/test_operations/test_average.py index e65d2be..c079dba 100644 --- a/tests/test_operations/test_average.py +++ b/tests/test_operations/test_average.py @@ -1,34 +1,33 @@ import os +from packaging.version import Version +import geopandas as gpd import pytest import xarray as xr -import geopandas as gpd -from shapely import Polygon, MultiPolygon +from daops import config_ +from daops.ops.average import average_over_dims, average_shape, average_time from roocs_utils.exceptions import InvalidParameterValue +from shapely import Polygon -from daops import CONFIG -from daops.ops.average import average_over_dims -from daops.ops.average import average_time -from daops.ops.average import average_shape -from tests._common import CMIP5_DAY -from tests._common import CMIP6_MONTH CMIP5_IDS = [ "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", "cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas", "cmip5.output1.MOHC.HadGEM2-ES.historical.mon.land.Lmon.r1i1p1.latest.rh", ] -CMIP6_IDS = [ - "CMIP6.CMIP.CAS.FGOALS-g3.historical.r1i1p1f1.Amon.tas.gn.v20190818" -] +CMIP6_IDS = ["CMIP6.CMIP.CAS.FGOALS-g3.historical.r1i1p1f1.Amon.tas.gn.v20190818"] -POLY = Polygon([[5.8671874999999996, 57.326521225217064], - [-15.468749999999998, 48.45835188280866], - [-16.171875, 24.84656534821976], - [-3.8671874999999996, 13.581920900545844], - [21.796875, 25.799891182088334], - [22.8515625, 52.482780222078226], - [5.8671874999999996, 57.326521225217064]]) +POLY = Polygon( + [ + [5.8671874999999996, 57.326521225217064], + [-15.468749999999998, 48.45835188280866], + [-16.171875, 24.84656534821976], + [-3.8671874999999996, 13.581920900545844], + [21.796875, 25.799891182088334], + [22.8515625, 52.482780222078226], + [5.8671874999999996, 57.326521225217064], + ] +) def _check_output_nc(result, fname="output_001.nc"): @@ -110,17 +109,21 @@ def test_average_level(tmpdir): @pytest.mark.online def test_average_shape(tmpdir): + xesmf = pytest.importorskip("xesmf") + if Version(xesmf.__version__) < Version("0.8.2"): + pytest.skip("Package xESMF >= 0.8.2 is required") + # Save POLY to tmpdir - tmp_poly_path = os.path.join(tmpdir,"tmppoly.json") - gpd.GeoDataFrame([{'geometry': POLY}]).to_file(tmp_poly_path) + tmp_poly_path = os.path.join(tmpdir, "tmppoly.json") + gpd.GeoDataFrame([{"geometry": POLY}]).to_file(tmp_poly_path) result = average_shape( CMIP6_IDS[0], shape=tmp_poly_path, variable=None, output_dir=tmpdir, - file_namer='simple', - apply_fixes=False + file_namer="simple", + apply_fixes=False, ) _check_output_nc(result) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) @@ -135,21 +138,24 @@ def test_average_shape_none(tmpdir): shape=None, variable=None, output_dir=tmpdir, - file_namer='simple', - apply_fixes=False) + file_namer="simple", + apply_fixes=False, + ) assert str(exc.value) == "At least one area for averaging must be provided" @pytest.mark.online -def test_average_time_month(tmpdir): - ds = xr.open_mfdataset(CMIP5_DAY, use_cftime=True, combine="by_coords") +def test_average_time_month(tmpdir, mini_esgf_data): + ds = xr.open_mfdataset( + mini_esgf_data["CMIP5_DAY"], use_cftime=True, combine="by_coords" + ) assert ds.time.shape == (3600,) assert ds.time.values[0].isoformat() == "2005-12-01T12:00:00" assert ds.time.values[-1].isoformat() == "2015-11-30T12:00:00" result = average_time( - CMIP5_DAY, + mini_esgf_data["CMIP5_DAY"], freq="month", output_dir=tmpdir, file_namer="simple", @@ -171,17 +177,19 @@ def test_average_time_month(tmpdir): @pytest.mark.online -def test_average_time_year(tmpdir): +def test_average_time_year(tmpdir, mini_esgf_data): # allow use of dataset - defaults to c3s-cmip6 and this is not in the catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False - ds = xr.open_mfdataset(CMIP6_MONTH, use_cftime=True, combine="by_coords") + config_()["project:c3s-cmip6"]["use_catalog"] = False + ds = xr.open_mfdataset( + mini_esgf_data["CMIP6_MONTH"], use_cftime=True, combine="by_coords" + ) assert ds.time.shape == (1980,) assert ds.time.values[0].isoformat() == "1850-01-16T12:00:00" assert ds.time.values[-1].isoformat() == "2014-12-16T12:00:00" result = average_time( - CMIP6_MONTH, + mini_esgf_data["CMIP6_MONTH"], freq="year", output_dir=tmpdir, file_namer="simple", @@ -198,14 +206,14 @@ def test_average_time_year(tmpdir): assert ds_res.time.shape == (time_length,) assert ds_res.time.values[0].isoformat() == "1850-01-01T00:00:00" assert ds_res.time.values[-1].isoformat() == "2014-01-01T00:00:00" - CONFIG["project:c3s-cmip6"]["use_catalog"] = True + config_()["project:c3s-cmip6"]["use_catalog"] = True @pytest.mark.online -def test_average_time_incorrect_freq(tmpdir): +def test_average_time_incorrect_freq(tmpdir, mini_esgf_data): with pytest.raises(InvalidParameterValue) as exc: average_time( - CMIP5_DAY, + mini_esgf_data["CMIP5_DAY"], freq="week", output_dir=tmpdir, file_namer="simple", @@ -219,10 +227,10 @@ def test_average_time_incorrect_freq(tmpdir): @pytest.mark.online -def test_average_time_no_freq(tmpdir): +def test_average_time_no_freq(tmpdir, mini_esgf_data): with pytest.raises(InvalidParameterValue) as exc: average_time( - CMIP5_DAY, + mini_esgf_data["CMIP5_DAY"], freq=None, output_dir=tmpdir, file_namer="simple", diff --git a/tests/test_operations/test_regrid.py b/tests/test_operations/test_regrid.py index f48b5b5..a1047bf 100644 --- a/tests/test_operations/test_regrid.py +++ b/tests/test_operations/test_regrid.py @@ -1,14 +1,10 @@ import os +from packaging.version import Version import pytest import xarray as xr -# from daops.ops.regrid import regrid - -# TODO: remove when upgraded to new clisops version -# pytestmark = pytest.mark.xfail(reason="needs clisops>=0.12 with regrid operator") - CMIP6_IDS = ["CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.Omon.tos.gn.v20190710"] @@ -17,7 +13,11 @@ def _check_output_nc(result, fname="output_001.nc"): @pytest.mark.online -def test_regrid(tmpdir, load_esgf_test_data): +def test_regrid(tmpdir): + xesmf = pytest.importorskip("xesmf") + if Version(xesmf.__version__) < Version("0.8.2"): + pytest.skip("Package xESMF >= 0.8.2 is required") + from daops.ops.regrid import regrid result = regrid( diff --git a/tests/test_operations/test_subset.py b/tests/test_operations/test_subset.py index 8c6674b..d2f51e8 100644 --- a/tests/test_operations/test_subset.py +++ b/tests/test_operations/test_subset.py @@ -3,26 +3,18 @@ import numpy as np import pytest import xarray as xr -from roocs_utils.exceptions import InvalidParameterValue -from roocs_utils.exceptions import MissingParameterValue -from roocs_utils.parameter import area_parameter -from roocs_utils.parameter import collection_parameter -from roocs_utils.parameter import time_parameter -from roocs_utils.parameter.param_utils import level_interval -from roocs_utils.parameter.param_utils import level_series -from roocs_utils.parameter.param_utils import time_components -from roocs_utils.parameter.param_utils import time_interval -from roocs_utils.parameter.param_utils import time_series -from roocs_utils.utils.file_utils import FileMapper - -from daops import CONFIG +from daops import config_ from daops.ops.subset import subset -from tests._common import CMIP5_DAY -from tests._common import CMIP5_TAS_FPATH -from tests._common import CMIP6_DAY -from tests._common import CMIP6_MONTH -from tests._common import MINI_ESGF_MASTER_DIR -from tests._common import CMIP6_KERCHUNK_HTTPS_OPEN_JSON +from roocs_utils.exceptions import InvalidParameterValue, MissingParameterValue +from roocs_utils.parameter import area_parameter, collection_parameter, time_parameter +from roocs_utils.parameter.param_utils import ( + level_interval, + level_series, + time_components, + time_interval, + time_series, +) +from roocs_utils.utils.file_utils import FileMapper CMIP5_IDS = [ "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", @@ -51,7 +43,7 @@ def _check_output_nc(result, fname="output_001.nc"): @pytest.mark.online -def test_subset_zostoga_with_fix(tmpdir, load_esgf_test_data): +def test_subset_zostoga_with_fix(tmpdir): result = subset( CMIP5_IDS[0], time=time_interval("2085-01-16", "2120-12-16"), @@ -65,7 +57,7 @@ def test_subset_zostoga_with_fix(tmpdir, load_esgf_test_data): assert "lev" not in ds.dims -def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_esgf_test_data): +def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_test_data): result = subset( CMIP5_IDS[0], time=time_interval("2085-01-16", "2120-12-16"), @@ -82,7 +74,7 @@ def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_t(tmpdir, load_esgf_test_data): +def test_subset_t(tmpdir, load_test_data): result = subset( CMIP5_IDS[1], time=time_interval("2085-01-16", "2120-12-16"), @@ -94,11 +86,11 @@ def test_subset_t(tmpdir, load_esgf_test_data): assert ds.time.shape == (433,) -# @pytest.mark.online @pytest.mark.skip(reason="test service is not available") -def test_subset_t_kerchunk(tmpdir): +@pytest.mark.online +def test_subset_t_kerchunk(tmpdir, cmip6_kerchunk_https_open_json): result = subset( - CMIP6_KERCHUNK_HTTPS_OPEN_JSON, + cmip6_kerchunk_https_open_json, time=time_interval("1948-01-16", "1952-12-16"), area=(0, -10, 120, 40), output_dir=tmpdir, @@ -144,12 +136,8 @@ def test_subset_collection_as_empty_string(tmpdir): @pytest.mark.online -def test_subset_t_y_x(tmpdir, load_esgf_test_data): - fpath = ( - f"{MINI_ESGF_MASTER_DIR}/" - "test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/" - "atmos/Amon/r1i1p1/latest/tas/*.nc" - ) +def test_subset_t_y_x(tmpdir, stratus): + fpath = f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" ds = xr.open_mfdataset( fpath, @@ -172,10 +160,9 @@ def test_subset_t_y_x(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_t_z_y_x(tmpdir, load_esgf_test_data): +def test_subset_t_z_y_x(tmpdir, stratus): fpath = ( - f"{MINI_ESGF_MASTER_DIR}/" - "test_data/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" + f"{stratus.path}/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" "GFDL-ESM4/historical/r1i1p1f1/Amon/o3/gr1/v20190726/" "o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.nc" ) @@ -187,27 +174,30 @@ def test_subset_t_z_y_x(tmpdir, load_esgf_test_data): ) assert ds.o3.shape == (1200, 19, 2, 3) - assert list(ds.o3.coords["plev"].values) == [ - 100000.0, - 92500.0, - 85000.0, - 70000.0, - 60000.0, - 50000.0, - 40000.0, - 30000.0, - 25000.0, - 20000.0, - 15000.0, - 10000.0, - 7000.0, - 5000.0, - 3000.0, - 2000.0, - 1000.0, - 500.0, - 100.0, - ] + np.testing.assert_array_equal( + ds.o3.coords["plev"], + [ + 100000.0, + 92500.0, + 85000.0, + 70000.0, + 60000.0, + 50000.0, + 40000.0, + 30000.0, + 25000.0, + 20000.0, + 15000.0, + 10000.0, + 7000.0, + 5000.0, + 3000.0, + 2000.0, + 1000.0, + 500.0, + 100.0, + ], + ) result = subset( CMIP6_IDS[0], @@ -224,7 +214,7 @@ def test_subset_t_z_y_x(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_t_with_invalid_date(tmpdir, load_esgf_test_data): +def test_subset_t_with_invalid_date(tmpdir, load_test_data): with pytest.raises(Exception) as exc: subset( CMIP5_IDS[1], @@ -271,7 +261,7 @@ def test_subset_with_fix_and_multiple_ids(zostoga_id, tmpdir): @pytest.mark.online -def test_parameter_classes_as_args(tmpdir, load_esgf_test_data): +def test_parameter_classes_as_args(tmpdir, load_test_data): collection = collection_parameter.CollectionParameter(CMIP5_IDS[1]) time = time_parameter.TimeParameter(time_interval("2085-01-16", "2120-12-16")) area = area_parameter.AreaParameter((0, -10, 120, 40)) @@ -286,7 +276,7 @@ def test_parameter_classes_as_args(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_time_is_none(tmpdir, load_esgf_test_data): +def test_time_is_none(tmpdir, load_test_data): result = subset( CMIP5_IDS[1], time=None, @@ -299,7 +289,7 @@ def test_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", ), use_cftime=True, @@ -315,7 +305,7 @@ def test_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_end_time_is_none(tmpdir, load_esgf_test_data): +def test_end_time_is_none(tmpdir, load_test_data): result = subset( CMIP5_IDS[2], time=time_interval("1940-10-14/"), @@ -327,7 +317,7 @@ def test_end_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc", ), use_cftime=True, @@ -341,7 +331,7 @@ def test_end_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_start_time_is_none(tmpdir, load_esgf_test_data): +def test_start_time_is_none(tmpdir, load_test_data): result = subset( CMIP5_IDS[1], time=time_interval("/2120-12-16"), @@ -353,7 +343,7 @@ def test_start_time_is_none(tmpdir, load_esgf_test_data): ds = xr.open_mfdataset( os.path.join( - CONFIG["project:cmip5"]["base_dir"], + config_()["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", ), use_cftime=True, @@ -367,7 +357,7 @@ def test_start_time_is_none(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_time_invariant_subset_standard_name(tmpdir, load_esgf_test_data): +def test_time_invariant_subset_standard_name(tmpdir, load_test_data): dset = "CMIP6.ScenarioMIP.IPSL.IPSL-CM6A-LR.ssp119.r1i1p1f1.fx.mrsofc.gr.v20190410" result = subset( @@ -382,11 +372,11 @@ def test_time_invariant_subset_standard_name(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_with_file_mapper(tmpdir, load_esgf_test_data): +def test_subset_with_file_mapper(tmpdir, stratus): file_paths = [ - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_200512-203011.nc", - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_203012-205511.nc", ] @@ -404,7 +394,7 @@ def test_subset_with_file_mapper(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_with_catalog(tmpdir, load_esgf_test_data): +def test_subset_with_catalog(tmpdir, load_test_data): # c3s-cmip6 dataset so will use catalog in consolidate result = subset( "c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.Amon.rlds.gr1.v20190619", @@ -420,10 +410,10 @@ def test_subset_with_catalog(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_with_catalog_time_invariant(tmpdir, load_esgf_test_data): +def test_subset_with_catalog_time_invariant(tmpdir, load_test_data): # c3s-cmip6 dataset so will use catalog in consolidate result = subset( - f"c3s-cmip6.ScenarioMIP.MPI-M.MPI-ESM1-2-LR.ssp370.r1i1p1f1.fx.mrsofc.gn.v20190815", + "c3s-cmip6.ScenarioMIP.MPI-M.MPI-ESM1-2-LR.ssp370.r1i1p1f1.fx.mrsofc.gn.v20190815", output_dir=tmpdir, output_type="nc", file_namer="standard", @@ -433,13 +423,16 @@ def test_subset_with_catalog_time_invariant(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_by_time_components_year_month(tmpdir, load_esgf_test_data): +def test_subset_by_time_components_year_month(tmpdir, mini_esgf_data): tc1 = time_components(year=(2021, 2022), month=["dec", "jan", "feb"]) tc2 = time_components(year=(2021, 2022), month=[12, 1, 2]) for tc in (tc1, tc2): result = subset( - CMIP5_TAS_FPATH, time_components=tc, output_dir=tmpdir, file_namer="simple" + mini_esgf_data["CMIP5_TAS_FPATH"], + time_components=tc, + output_dir=tmpdir, + file_namer="simple", ) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) @@ -450,14 +443,17 @@ def test_subset_by_time_components_year_month(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_by_time_components_month_day(tmpdir, load_esgf_test_data): +def test_subset_by_time_components_month_day(tmpdir, mini_esgf_data): # 20051201-20151130 tc1 = time_components(month=["jul"], day=[1, 11, 21]) tc2 = time_components(month=[7], day=[1, 11, 21]) for tc in (tc1, tc2): result = subset( - CMIP5_DAY, time_components=tc, output_dir=tmpdir, file_namer="simple" + mini_esgf_data["CMIP5_DAY"], + time_components=tc, + output_dir=tmpdir, + file_namer="simple", ) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) @@ -469,7 +465,7 @@ def test_subset_by_time_components_month_day(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_by_time_interval_and_components_month_day(tmpdir, load_esgf_test_data): +def test_subset_by_time_interval_and_components_month_day(tmpdir, mini_esgf_data): # 20051201-20151130 ys, ye = 2007, 2010 ti = time_interval(f"{ys}-12-01T00:00:00", f"{ye}-11-30T23:59:59") @@ -482,7 +478,7 @@ def test_subset_by_time_interval_and_components_month_day(tmpdir, load_esgf_test for tc in (tc1, tc2): result = subset( - CMIP5_DAY, + mini_esgf_data["CMIP5_DAY"], time=ti, time_components=tc, output_dir=tmpdir, @@ -497,7 +493,7 @@ def test_subset_by_time_interval_and_components_month_day(tmpdir, load_esgf_test # @pytest.mark.online -# def test_subset_by_time_series_and_components_month_day_cmip5(tmpdir, load_esgf_test_data): +# def test_subset_by_time_series_and_components_month_day_cmip5(tmpdir, load_test_data): # # 20051201-20151130 # ys, ye = 2007, 2010 # req_times = [tm.isoformat() for tm in xr.open_dataset(CMIP5_DAY).time.values @@ -523,18 +519,16 @@ def test_subset_by_time_interval_and_components_month_day(tmpdir, load_esgf_test @pytest.mark.online -def test_subset_by_time_series_and_components_month_day_cmip6( - tmpdir, load_esgf_test_data -): +def test_subset_by_time_series_and_components_month_day_cmip6(tmpdir, mini_esgf_data): # 18500101-20141231 # allow use of dataset - defaults to c3s-cmip6 and this is not in the catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False ys, ye = 1998, 2010 req_times = [ tm.isoformat() - for tm in xr.open_dataset(CMIP6_DAY).time.values + for tm in xr.open_dataset(mini_esgf_data["CMIP6_DAY"]).time.values if ys <= tm.year <= ye ] @@ -547,7 +541,7 @@ def test_subset_by_time_series_and_components_month_day_cmip6( for tc in (tc1, tc2): result = subset( - CMIP6_DAY, + mini_esgf_data["CMIP6_DAY"], time=ts, time_components=tc, output_dir=tmpdir, @@ -562,16 +556,16 @@ def test_subset_by_time_series_and_components_month_day_cmip6( @pytest.mark.online -def test_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): - #  tests key error is raised when trying to select a non existent day on a monthly dataset +def test_subset_components_day_monthly_dataset(tmpdir, mini_esgf_data): + # tests key error is raised when trying to select a nonexistent day on a monthly dataset # 18500101-20141231 # allow use of dataset - defaults to c3s-cmip6 and this is not in the catalog - CONFIG["project:c3s-cmip6"]["use_catalog"] = False + config_()["project:c3s-cmip6"]["use_catalog"] = False ys, ye = 1998, 2010 req_times = [ tm.isoformat() - for tm in xr.open_dataset(CMIP6_MONTH).time.values + for tm in xr.open_dataset(mini_esgf_data["CMIP6_MONTH"]).time.values if ys <= tm.year <= ye ] @@ -583,7 +577,7 @@ def test_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): with pytest.raises(KeyError) as exc: subset( - CMIP6_MONTH, + mini_esgf_data["CMIP6_MONTH"], time=ts, time_components=tc, output_dir=tmpdir, @@ -592,12 +586,14 @@ def test_subset_components_day_monthly_dataset(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_by_time_series(tmpdir, load_esgf_test_data): - t = [str(tm) for tm in xr.open_dataset(CMIP5_TAS_FPATH).time.values] +def test_subset_by_time_series(tmpdir, mini_esgf_data): + t = [ + str(tm) for tm in xr.open_dataset(mini_esgf_data["CMIP5_TAS_FPATH"]).time.values + ] some_times = [t[0], t[100], t[4], t[33], t[9]] result = subset( - CMIP5_TAS_FPATH, + mini_esgf_data["CMIP5_TAS_FPATH"], time=time_series(some_times), output_dir=tmpdir, file_namer="simple", @@ -614,7 +610,7 @@ def test_subset_by_time_series(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_by_level_series(tmpdir, load_esgf_test_data): +def test_subset_by_level_series(tmpdir, load_test_data): some_levels = [60000.0, 15000.0, 40000.0, 1000.0, 92500.0] result = subset( @@ -635,7 +631,7 @@ def test_subset_by_level_series(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): +def test_subset_cmip6_nc_consistent_bounds(tmpdir, load_test_data): """Test daops subset to check consistent bounds in metadata.""" result = subset( CMIP6_IDS[0], @@ -660,7 +656,7 @@ def test_subset_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_c3s_cmip6_nc_consistent_bounds(tmpdir, load_esgf_test_data): +def test_subset_c3s_cmip6_nc_consistent_bounds(tmpdir, load_test_data): """Test daops subset to check consistent bounds in metadata.""" result = subset( C3S_CMIP6_IDS[0], diff --git a/tests/test_utils/test_base_lookup.py b/tests/test_utils/test_base_lookup.py index 9a4815c..82fe80a 100644 --- a/tests/test_utils/test_base_lookup.py +++ b/tests/test_utils/test_base_lookup.py @@ -1,10 +1,9 @@ from daops.utils.base_lookup import Lookup -from tests._common import MINI_ESGF_MASTER_DIR -def test_convert_to_ds_id(load_esgf_test_data): +def test_convert_to_ds_id(stratus): fpath = ( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc" ) ds_id = Lookup(fpath).convert_to_ds_id() diff --git a/tests/test_utils/test_core.py b/tests/test_utils/test_core.py index 3b4dd83..a22f850 100644 --- a/tests/test_utils/test_core.py +++ b/tests/test_utils/test_core.py @@ -1,31 +1,35 @@ import pytest import xarray as xr +from daops.utils.core import Characterised, open_dataset -from daops.utils.core import Characterised -from daops.utils.core import open_dataset -from tests._common import MINI_ESGF_MASTER_DIR -fpath = ( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/inmcm4" - "/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc" -) -ds_id = "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga" +class TestOpenDataset: + ds_id = "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga" -def test_open_dataset_with_fix(load_esgf_test_data): - unfixed_ds = xr.open_mfdataset(fpath, use_cftime=True, combine="by_coords") - fixed_ds = open_dataset(ds_id, fpath) - assert unfixed_ds.dims != fixed_ds.dims - assert "lev" in unfixed_ds.dims - assert "lev" not in fixed_ds.dims + def test_open_dataset_with_fix(self, stratus): + fpath = ( + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/inmcm4" + "/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc" + ) + unfixed_ds = xr.open_mfdataset(fpath, use_cftime=True, combine="by_coords") + fixed_ds = open_dataset(self.ds_id, fpath) + assert unfixed_ds.dims != fixed_ds.dims + assert "lev" in unfixed_ds.dims + assert "lev" not in fixed_ds.dims -def test_open_dataset_without_fix(load_esgf_test_data): - ds = xr.open_mfdataset(fpath, use_cftime=True, combine="by_coords") - not_fixed_ds = open_dataset(ds_id, fpath, apply_fixes=False) - assert ds.dims == not_fixed_ds.dims - assert "lev" in ds.dims - assert "lev" in not_fixed_ds.dims + def test_open_dataset_without_fix(self, stratus): + fpath = ( + f"{stratus.path}/badc/cmip5/data/cmip5/output1/INM/inmcm4" + "/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc" + ) + + ds = xr.open_mfdataset(fpath, use_cftime=True, combine="by_coords") + not_fixed_ds = open_dataset(self.ds_id, fpath, apply_fixes=False) + assert ds.dims == not_fixed_ds.dims + assert "lev" in ds.dims + assert "lev" in not_fixed_ds.dims @pytest.mark.online diff --git a/tests/test_utils/test_normalise.py b/tests/test_utils/test_normalise.py index 01b6a8f..4ef8329 100644 --- a/tests/test_utils/test_normalise.py +++ b/tests/test_utils/test_normalise.py @@ -1,10 +1,9 @@ from collections import OrderedDict from daops.utils.normalise import ResultSet -from tests._common import MINI_ESGF_MASTER_DIR -def test_file_uris_url(load_esgf_test_data): +def test_file_uris_url(): result = ResultSet() original_file_urls = OrderedDict( @@ -30,7 +29,7 @@ def test_file_uris_url(load_esgf_test_data): ] -def test_file_uris_files(load_esgf_test_data): +def test_file_uris_files(stratus): result = ResultSet() file_path = OrderedDict( @@ -38,7 +37,7 @@ def test_file_uris_files(load_esgf_test_data): ( "CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r1i1p1f1.Amon.rlds.gr.v20180803", [ - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/IPSL" + f"{stratus.path}/badc/cmip6/data/CMIP6/CMIP/IPSL" "/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/rlds/gr/v20180803" "/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc" ], @@ -49,6 +48,6 @@ def test_file_uris_files(load_esgf_test_data): for ds_id, file in file_path.items(): result.add(ds_id, file) assert result.file_uris == [ - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical" + f"{stratus.path}/badc/cmip6/data/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical" "/r1i1p1f1/Amon/rlds/gr/v20180803/rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_185001-201412.nc" ] diff --git a/tests/test_xarray/__init__.py b/tests/test_xarray/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_xarray/test_xarray_aggregation.py b/tests/test_xarray/test_xarray_aggregation.py index f672b58..8a0a734 100644 --- a/tests/test_xarray/test_xarray_aggregation.py +++ b/tests/test_xarray/test_xarray_aggregation.py @@ -1,5 +1,4 @@ -""" -test_xarray_aggregation.py +"""test_xarray_aggregation.py ========================== Set of tests to assert that Xarray behaves in ways we would expect when @@ -12,63 +11,65 @@ - F3 """ + import itertools import os -import pathlib -import tempfile import numpy as np import pytest import xarray as xr -from .._common import MINI_ESGF_MASTER_DIR -from .._common import TESTS_OUTPUTS -file_base = ( - f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/" - "HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1" -) +@pytest.fixture(scope="module") +def prepare_files(stratus): + file_base = ( + f"{stratus.path}/badc/cmip5/data/cmip5/output1/MOHC/" + "HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1" + ) -test_files = [ - file_base + end - for end in ("_200512-203011.nc", "_203012-205511.nc", "_205512-208011.nc") -] + test_files = [ + file_base + end + for end in ("_200512-203011.nc", "_203012-205511.nc", "_205512-208011.nc") + ] -F1, F2, F3 = test_files + return test_files # Functions to make modified NC files # need to make files temporary files -def _make_nc_modify_var_attr(nc_path, var_id, attr, value, path=TESTS_OUTPUTS): - ds = _open(nc_path) - ds[var_id].attrs[attr] = value - ds.to_netcdf(os.path.join(path, "tas_modify_var_attr.nc")) +def _open(file_paths): + return xr.open_mfdataset(file_paths, use_cftime=True, combine="by_coords") + + +def _make_nc_modify_var_attr(nc_path, var_id, attr, value, path): + with _open(nc_path) as ds: + ds[var_id].attrs[attr] = value + ds.to_netcdf(os.path.join(path, "tas_modify_var_attr.nc")) tmp_path = os.path.join(path, "tas_modify_var_attr.nc") return tmp_path -def _make_nc_modify_global_attr(nc_path, attr, value, path=TESTS_OUTPUTS): - ds = _open(nc_path) - ds.attrs[attr] = value - # ds.to_netcdf(path=tmp_path.mkdir("test_dir").join("modify_var_attr.nc")) - ds.to_netcdf(os.path.join(path, "tas_modify_global_attr.nc")) +def _make_nc_modify_global_attr(nc_path, attr, value, path): + with _open(nc_path) as ds: + ds.attrs[attr] = value + ds.to_netcdf(os.path.join(path, "tas_modify_global_attr.nc")) tmp_path = os.path.join(path, "tas_modify_global_attr.nc") return tmp_path -def _make_nc_modify_var_id(nc_path, old_var_id, new_var_id, path=TESTS_OUTPUTS): - ds = _open(nc_path) - ds = ds.rename({old_var_id: new_var_id}) - ds.to_netcdf(path=os.path.join(path, "tas_modify_var_id.nc")) +def _make_nc_modify_var_id(nc_path, old_var_id, new_var_id, path): + with _open(nc_path) as ds: + ds = ds.rename({old_var_id: new_var_id}) + ds.to_netcdf(path=os.path.join(path, "tas_modify_var_id.nc")) tmp_path = os.path.join(path, "tas_modify_var_id.nc") return tmp_path -def _make_nc_modify_fill_value(nc_path, var_id, fill_value, path=TESTS_OUTPUTS): - ds = _open(nc_path) - ds[var_id].encoding["_FillValue"] = fill_value - ds.tas.encoding["missing_value"] = fill_value - ds.to_netcdf(path=os.path.join(path, "tas_modify_fill_value.nc")) +def _make_nc_modify_fill_value(nc_path, var_id, fill_value, path): + with _open(nc_path) as ds: + ds[var_id].encoding["_FillValue"] = fill_value + ds.tas.encoding["missing_value"] = fill_value + ds.to_netcdf(path=os.path.join(path, "tas_modify_fill_value.nc")) tmp_path = os.path.join(path, "tas_modify_fill_value.nc") return tmp_path @@ -85,30 +86,33 @@ def global_attr(request): return attr -def _open(file_paths): - return xr.open_mfdataset(file_paths, use_cftime=True, combine="by_coords") - - -def test_agg_success_with_no_changes(load_esgf_test_data): - ds = _open([F1, F2, F3]) +def test_agg_success_with_no_changes(prepare_files): + ds = _open(prepare_files) assert "tas" in ds.variables ds.close() -def test_agg_fails_diff_var_attrs_change_F2(var_attr, load_esgf_test_data): - V = "rubbish" - file_paths = F1, _make_nc_modify_var_attr(F2, "tas", var_attr, V), F3 - ds = _open(file_paths) - assert ds.tas.__getattr__(f"{var_attr}") != V - ds.close() +@pytest.mark.skip(reason="This test is hanging quite often ...") +def test_agg_fails_diff_var_attrs_change_F2(var_attr, prepare_files, tmpdir): + v = "rubbish" + file_paths = ( + prepare_files[0], + _make_nc_modify_var_attr(prepare_files[1], "tas", var_attr, v, path=tmpdir), + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds.tas.__getattr__(f"{var_attr}") != v -def test_agg_fails_diff_var_attrs_change_F1(var_attr, load_esgf_test_data): - V = "rubbish" - file_paths = _make_nc_modify_var_attr(F1, "tas", var_attr, V), F2, F3 - ds = _open(file_paths) - assert ds.tas.__getattr__(f"{var_attr}") == V - ds.close() +def test_agg_fails_diff_var_attrs_change_F1(var_attr, prepare_files, tmpdir): + v = "rubbish" + file_paths = ( + _make_nc_modify_var_attr(prepare_files[0], "tas", var_attr, v, path=tmpdir), + prepare_files[1], + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds.tas.__getattr__(f"{var_attr}") == v # doesn't work when changing all 3 - something to do with how I'm modifying files? @@ -130,20 +134,26 @@ def test_agg_fails_diff_var_attrs_change_F1(var_attr, load_esgf_test_data): # ) -def test_agg_behaviour_diff_global_attrs_change_F2(global_attr, load_esgf_test_data): - V = "other" - file_paths = F1, _make_nc_modify_global_attr(F2, global_attr, V), F3 - ds = _open(file_paths) - assert ds.__getattr__(f"{global_attr}") != V - ds.close() +def test_agg_behaviour_diff_global_attrs_change_F2(global_attr, prepare_files, tmpdir): + v = "other" + file_paths = ( + prepare_files[0], + _make_nc_modify_global_attr(prepare_files[1], global_attr, v, path=tmpdir), + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds.__getattr__(f"{global_attr}") != v -def test_agg_behaviour_diff_global_attrs_change_F1(global_attr, load_esgf_test_data): - V = "other" - file_paths = _make_nc_modify_global_attr(F1, global_attr, V), F2, F3 - ds = _open(file_paths) - assert ds.__getattr__(f"{global_attr}") == V - ds.close() +def test_agg_behaviour_diff_global_attrs_change_F1(global_attr, prepare_files, tmpdir): + v = "other" + file_paths = ( + _make_nc_modify_global_attr(prepare_files[0], global_attr, v, path=tmpdir), + prepare_files[1], + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds.__getattr__(f"{global_attr}") == v # failure not relevant to what is being tested @@ -167,57 +177,74 @@ def test_agg_behaviour_diff_global_attrs_change_F1(global_attr, load_esgf_test_d # both new_var_id and old_var_id are in ds.variables no matter which file is changed -def test_agg_fails_diff_var_id_change_F1(load_esgf_test_data): +def test_agg_fails_diff_var_id_change_F1(prepare_files, tmpdir): new_var_id = "blah" old_var_id = "tas" - file_paths = _make_nc_modify_var_id(F1, old_var_id, new_var_id), F2, F3 - ds = _open(file_paths) - assert new_var_id, old_var_id in ds.variables - ds.close() + file_paths = ( + _make_nc_modify_var_id(prepare_files[0], old_var_id, new_var_id, path=tmpdir), + prepare_files[1], + prepare_files[2], + ) + with _open(file_paths) as ds: + assert new_var_id, old_var_id in ds.variables -def test_agg_fails_diff_var_id_change_F2(load_esgf_test_data): +def test_agg_fails_diff_var_id_change_F2(prepare_files, tmpdir): new_var_id = "blah" old_var_id = "tas" - file_paths = F1, _make_nc_modify_var_id(F2, old_var_id, new_var_id), F3 - ds = _open(file_paths) - assert new_var_id, old_var_id in ds.variables - ds.close() + file_paths = ( + prepare_files[0], + _make_nc_modify_var_id(prepare_files[1], old_var_id, new_var_id, path=tmpdir), + prepare_files[2], + ) + with _open(file_paths) as ds: + assert new_var_id, old_var_id in ds.variables -def test_agg_fails_diff_fill_value_change_F2(load_esgf_test_data): +def test_agg_fails_diff_fill_value_change_F2(prepare_files, tmpdir): var_id = "tas" fill_value = np.float32(-1e20) - file_paths = F1, _make_nc_modify_fill_value(F2, var_id, fill_value=fill_value), F3 - ds = _open(file_paths) - assert ds[var_id].encoding["_FillValue"] != fill_value - ds.close() - - -def test_agg_fails_diff_fill_value_change_F1(load_esgf_test_data): + file_paths = ( + prepare_files[0], + _make_nc_modify_fill_value( + prepare_files[1], var_id, fill_value=fill_value, path=tmpdir + ), + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds[var_id].encoding["_FillValue"] != fill_value + + +def test_agg_fails_diff_fill_value_change_F1(prepare_files, tmpdir): var_id = "tas" fill_value = np.float32(-1e20) - file_paths = _make_nc_modify_fill_value(F1, var_id, fill_value=fill_value), F2, F3 - ds = _open(file_paths) - assert ds[var_id].encoding["_FillValue"] == fill_value - ds.close() - - -def test_agg_affected_by_order(load_esgf_test_data): + file_paths = ( + _make_nc_modify_fill_value( + prepare_files[0], var_id, fill_value=fill_value, path=tmpdir + ), + prepare_files[1], + prepare_files[2], + ) + with _open(file_paths) as ds: + assert ds[var_id].encoding["_FillValue"] == fill_value + + +def test_agg_affected_by_order(prepare_files, tmpdir): # Apply a breaking change to different files in the sequence and # assert that the same exception is raised regardless of which # file is modified - file_orders = itertools.permutations([F1, F2, F3]) + file_orders = itertools.permutations(prepare_files) for _f1, _f2, _f3 in file_orders: - file_paths = _f1, _make_nc_modify_var_attr(_f2, "tas", "units", "bad"), _f3 - if _f2 == F1: - ds = _open(file_paths) - assert "bad" in ds.tas.units - ds.close() - else: - ds = _open(file_paths) - assert "K" in ds.tas.units - ds.close() + file_paths = ( + _f1, + _make_nc_modify_var_attr(_f2, "tas", "units", "bad", path=tmpdir), + _f3, + ) + with _open(file_paths) as ds: + if _f2 == prepare_files[0]: + assert "bad" in ds.tas.units + else: + assert "K" in ds.tas.units # opens with incorrect change when change is in first file (earliest time) # otherwise no change (except in the case of var_id) diff --git a/tox.ini b/tox.ini index fba4d97..7cf4806 100644 --- a/tox.ini +++ b/tox.ini @@ -1,50 +1,45 @@ [tox] -envlist = py37, py38, black, docs -requires = pip >= 20.0 -opts = -v +min_version = 4.18.1 +envlist = + py{39,310,311,312} + lint +requires = + pip >=25.0 +opts = + --verbose -[travis] -python = - 3.8: py38 - 3.7: py37 - 3.7: docs - 3.7: black - - -[testenv:black] +[testenv:lint] skip_install = True basepython = python deps = - flake8 - black + black >=25.1.0 + ruff >=0.9.0 commands = - flake8 daops tests - black --check --target-version py37 daops tests --exclude tests/mini-esgf-data + black --check src/daops tests + ruff check src/daops [testenv:docs] extras = docs deps = commands = - make --directory=docs clean html + make --directory=docs clean html whitelist_externals = - make + make [testenv] setenv = HOME = {envtmpdir} PYTHONPATH = {toxinidir} - GDAL_VERSION = 3.0.0 - COV_CORE_SOURCE= -passenv = CI TRAVIS TRAVIS_* PROJ_DIR LD_LIBRARY_PATH GDAL_VERSION GDAL_DATA PATH + COV_CORE_SOURCE = +passenv = + CI extras = dev install_command = python -m pip install --no-user {opts} {packages} download = True deps = ; If you want to make tox run the tests with the same versions, create a ; requirements.txt with the pinned versions and uncomment the following line: - coveralls - pytest-cov - pip + coveralls >=4.0.1 commands = - py.test -m "not online" --cov daops --basetemp={envtmpdir} + pytest -m "not online" --cov daops --basetemp={envtmpdir} - coveralls