Merge pull request #23 from ihmeuw/feature/modernize

Feature/modernize
ihmeuw · Dec 30, 2024 · 0d70363 · 0d70363
2 parents f29bb87 + d2929d7
commit 0d70363
Show file tree

Hide file tree

Showing 23 changed files with 409 additions and 331 deletions.
diff --git a/.cruft.json b/.cruft.json
@@ -1,6 +1,6 @@
 {
   "template": "https://github.com/collijk/python-package-cookiecutter",
-  "commit": "7e9285f84cc6b52165dbc97b9a0d4f059d0f6818",
+  "commit": "cd59edc69d51f9485bca69eb940e0706bc65e9ba",
   "checkout": null,
   "context": {
     "cookiecutter": {
@@ -11,7 +11,8 @@
       "project_slug": "climate-data",
       "package_name": "climate_data",
       "project_short_description": "Pipelines to extract, format, and downscale ERA5 and CMIP6 data.",
-      "_template": "https://github.com/collijk/python-package-cookiecutter"
+      "_template": "https://github.com/collijk/python-package-cookiecutter",
+      "_commit": "cd59edc69d51f9485bca69eb940e0706bc65e9ba"
     }
   },
   "directory": null

diff --git a/.github/alternative_workflows/build_docs.yml b/.github/alternative_workflows/build_docs.yml
@@ -0,0 +1,21 @@
+name: Build and Deploy Docs
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+    types:
+      - closed
+
+jobs:
+  build-and-deploy-docs:
+    if: ${{ github.event.pull_request.merged }} or ${{ github.event_name == 'workflow_dispatch' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GH_TOKEN }}
+      - uses: ./.github/actions/python-poetry-env
+      - name: Deploy docs
+        run: poetry run mkdocs gh-deploy --force
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v5.0.0
     hooks:
       - id: check-ast
       - id: check-added-large-files

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ xarray = "^2024.11.0"
 cdsapi = "^0.7.5"
 matplotlib = "^3.8.4"
 scikit-learn = "^1.4.2"
-rra-tools = "^1.0.10"
+rra-tools = "^1.0.17"
 netcdf4 = "^1.7.2"
 pyarrow = "^16.0.0"
 gcsfs = "^2024.6.0"
@@ -52,24 +52,24 @@ pydantic = "^2.10.4"
 
 
 [tool.poetry.group.dev.dependencies]
-mkdocstrings = {version = ">=0.23", extras = ["python"]}
+mkdocstrings = {version = "*", extras = ["python"]}
 mkdocs-material = "*"
 mkdocs-table-reader-plugin = "*"
-mkdocs-gen-files = "^0.5.0"
+mkdocs-gen-files = "*"
 mypy = "*"
 pre-commit = "*"
 pymdown-extensions = "*"
 pytest = "*"
 pytest-github-actions-annotate-failures = "*"
 pytest-cov = "*"
 python-kacl = "*"
-ruff = ">=0.2.0"
-pandas-stubs = "^2.2.3.241009"
-types-pyyaml = "^6.0.12.20240311"
-types-requests = "^2.31.0.20240406"
-types-tqdm = "^4.66.0.20240417"
-mkdocs-literate-nav = "^0.6.1"
-mkdocs-section-index = "^0.3.9"
+ruff = "*"
+pandas-stubs = "*"
+types-pyyaml = "*"
+types-requests = "*"
+types-tqdm = "*"
+mkdocs-literate-nav = "*"
+mkdocs-section-index = "*"
 
 
 [build-system]
@@ -111,6 +111,7 @@ ignore = [
     "FBT002",   # Boolean positional args are super common in clis
     "PD901",    # Generic df names are fine
     "S311",     # Not using random numbers for crypto purposes
+    "S101",     # Use of `assert` detected
 ]
 
 [tool.ruff.lint.per-file-ignores]

diff --git a/scripts/gen_data_pages.py b/scripts/gen_data_pages.py
@@ -186,7 +186,7 @@
 
     - Daily Storage Root: `{cdata.daily_results}`
     - Naming Convention: `{{SCENARIO}}/{{DAILY_VARIABLE}}/{{YEAR}}.nc` (historical data only)
-        - `{{SCENARIO}}`: Generally, only historical data is available at the daily level, so this will be `historical`. 
+        - `{{SCENARIO}}`: Generally, only historical data is available at the daily level, so this will be `historical`.
         - `{{DAILY_VARIABLE}}`: The name of the variable being stored.
         - `{{YEAR}}`: The year of the data being stored.
     - Annual Storage Root: `{cdata.results}`

diff --git a/scripts/scenario_inclusion_metadata.parquet b/scripts/scenario_inclusion_metadata.parquet
diff --git a/src/climate_data/cli_options.py b/src/climate_data/cli_options.py
@@ -7,9 +7,9 @@
 It also provides global variables representing the full space of valid values for these options.
 """
 
+from collections.abc import Collection
 from typing import ParamSpec, TypeVar
 
-import click
 from rra_tools.cli_tools import (
     RUN_ALL,
     ClickOption,
@@ -18,20 +18,20 @@
     with_input_directory,
     with_num_cores,
     with_output_directory,
+    with_overwrite,
     with_progress_bar,
     with_queue,
     with_verbose,
 )
 
 from climate_data import constants as cdc
 
-
 _T = TypeVar("_T")
 _P = ParamSpec("_P")
 
 
 def with_year(
-    years: list[str],
+    years: Collection[str],
     *,
     allow_all: bool = False,
 ) -> ClickOption[_P, _T]:
@@ -42,6 +42,7 @@ def with_year(
         allow_all=allow_all,
         choices=years,
         help="Year to extract data for.",
+        convert=allow_all,
     )
 
 
@@ -55,6 +56,7 @@ def with_month(
         allow_all=allow_all,
         choices=cdc.MONTHS,
         help="Month to extract data for.",
+        convert=allow_all,
     )
 
 
@@ -68,6 +70,7 @@ def with_era5_variable(
         allow_all=allow_all,
         choices=cdc.ERA5_VARIABLES,
         help="Variable to extract.",
+        convert=allow_all,
     )
 
 
@@ -81,6 +84,7 @@ def with_era5_dataset(
         allow_all=allow_all,
         choices=cdc.ERA5_DATASETS,
         help="Dataset to extract.",
+        convert=allow_all,
     )
 
 
@@ -94,6 +98,7 @@ def with_cmip6_source(
         allow_all=allow_all,
         choices=cdc.CMIP6_SOURCES,
         help="CMIP6 source to extract.",
+        convert=allow_all,
     )
 
 
@@ -107,8 +112,10 @@ def with_cmip6_experiment(
         allow_all=allow_all,
         choices=cdc.CMIP6_EXPERIMENTS,
         help="CMIP6 experiment to extract.",
+        convert=allow_all,
     )
 
+
 def with_cmip6_variable(
     *,
     allow_all: bool = False,
@@ -119,11 +126,12 @@ def with_cmip6_variable(
         allow_all=allow_all,
         choices=[v.name for v in cdc.CMIP6_VARIABLES],
         help="CMIP6 variable to extract.",
+        convert=allow_all,
     )
 
 
 def with_target_variable(
-    variable_names: list[str],
+    variable_names: Collection[str],
     *,
     allow_all: bool = False,
 ) -> ClickOption[_P, _T]:
@@ -133,6 +141,7 @@ def with_target_variable(
         allow_all=allow_all,
         choices=variable_names,
         help="Variable to generate.",
+        convert=allow_all,
     )
 
 
@@ -145,8 +154,10 @@ def with_draw(
         allow_all=allow_all,
         choices=cdc.DRAWS,
         help="Draw to process.",
+        convert=allow_all,
     )
 
+
 def with_scenario(
     *,
     allow_all: bool = False,
@@ -156,36 +167,29 @@ def with_scenario(
         allow_all=allow_all,
         choices=cdc.SCENARIOS,
         help="Scenario to process.",
-    )
-
-
-def with_overwrite() -> ClickOption[_P, _T]:
-    return click.option(
-        "--overwrite",
-        is_flag=True,
-        help="Overwrite existing files.",
+        convert=allow_all,
     )
 
 
 __all__ = [
-    "with_year",
-    "with_month",
-    "with_era5_variable",
-    "with_era5_dataset",
-    "with_cmip6_source",
+    "RUN_ALL",
+    "ClickOption",
+    "with_choice",
     "with_cmip6_experiment",
-    "with_target_variable",
-    "with_draw",
-    "with_scenario",
-    "with_overwrite",
-    "with_output_directory",
-    "with_queue",
-    "with_verbose",
+    "with_cmip6_source",
     "with_debugger",
+    "with_draw",
+    "with_era5_dataset",
+    "with_era5_variable",
     "with_input_directory",
+    "with_month",
     "with_num_cores",
+    "with_output_directory",
+    "with_overwrite",
     "with_progress_bar",
-    "RUN_ALL",
-    "ClickOption",
-    "with_choice",
+    "with_queue",
+    "with_scenario",
+    "with_target_variable",
+    "with_verbose",
+    "with_year",
 ]
diff --git a/src/climate_data/constants.py b/src/climate_data/constants.py
@@ -1,10 +1,9 @@
 from pathlib import Path
-from typing import NamedTuple, Literal
+from typing import Literal, NamedTuple
 
-from pydantic import BaseModel
 import numpy as np
 import xarray as xr
-
+from pydantic import BaseModel
 
 ##############
 # File roots #
@@ -40,13 +39,16 @@
 
 # Extraction Constants
 
+
 class _ERA5Datasets(NamedTuple):
     # Use named tuple so that we can access the dataset names as attributes
     reanalysis_era5_land: str = "reanalysis-era5-land"
     reanalysis_era5_single_levels: str = "reanalysis-era5-single-levels"
 
+
 ERA5_DATASETS = _ERA5Datasets()
 
+
 class _ERA5Variables(NamedTuple):
     u_component_of_wind: str = "10m_u_component_of_wind"
     v_component_of_wind: str = "10m_v_component_of_wind"
@@ -56,6 +58,7 @@ class _ERA5Variables(NamedTuple):
     total_precipitation: str = "total_precipitation"
     sea_surface_temperature: str = "sea_surface_temperature"
 
+
 ERA5_VARIABLES = _ERA5Variables()
 
 CMIP6_SOURCES = [
@@ -83,11 +86,13 @@ class _ERA5Variables(NamedTuple):
     "NorESM2-MM",
 ]
 
+
 class _CMIP6Experiments(NamedTuple):
     ssp126: str = "ssp126"
     ssp245: str = "ssp245"
     ssp585: str = "ssp585"
 
+
 CMIP6_EXPERIMENTS = _CMIP6Experiments()
 
 
@@ -161,7 +166,7 @@ def names(self) -> list[str]:
         return [v.name for v in self]
 
     def get(self, name: str) -> CMIP6Variable:
-        return getattr(self, name)
+        return getattr(self, name)  # type: ignore[no-any-return]
 
     def to_dict(self) -> dict[str, CMIP6Variable]:
         return {v.name: v for v in self}
@@ -181,10 +186,12 @@ def to_dict(self) -> dict[str, CMIP6Variable]:
 
 DRAWS = [str(d) for d in range(100)]
 
+
 class _Scenarios(NamedTuple):
     historical: str = "historical"
     ssp126: str = "ssp126"
     ssp245: str = "ssp245"
     ssp585: str = "ssp585"
 
+
 SCENARIOS = _Scenarios()
diff --git a/src/climate_data/data.py b/src/climate_data/data.py
@@ -187,7 +187,7 @@ def load_scenario_metadata(self) -> pd.DataFrame:
 
     def save_scenario_inclusion_metadata(self, df: pd.DataFrame) -> None:
         # Need to save to our scripts directory for doc building
-        scripts_root = Path(__file__).parent.parent / "scripts"
+        scripts_root = Path(__file__).parent.parent.parent / "scripts"
         for root_dir in [self.results_metadata, scripts_root]:
             path = root_dir / "scenario_inclusion_metadata.parquet"
             if path.exists():