Skip to content

Commit

Permalink
Formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
collijk committed May 13, 2024
2 parents 735e120 + 2cb94cc commit 4f18280
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 73 deletions.
11 changes: 5 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,11 @@ exclude = [
# If you need to ignore something for some specific module,
# add overrides for them. Avoid changing the global config!
# For example:
# [[tool.mypy.overrides]]
# module = [
# "my_unpyted_dependency1.*",
# "my_unpyted_dependency2.*"
# ]
# ignore_missing_imports = true
[[tool.mypy.overrides]]
module = [
"cdsapi.*",
]
ignore_missing_imports = true

# [[tool.mypy.overrides]]
# module = [
Expand Down
12 changes: 5 additions & 7 deletions src/climate_downscale/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,24 @@
@click.group()
def cdrun() -> None:
"""Entry point for running climate downscale workflows."""
pass


@click.group()
def cdtask() -> None:
"""Entry point for running climate downscale tasks."""
pass


for module in [extract]:
runners = getattr(module, 'RUNNERS', {})
task_runners = getattr(module, 'TASK_RUNNERS', {})
runners = getattr(module, "RUNNERS", {})
task_runners = getattr(module, "TASK_RUNNERS", {})

if not runners or not task_runners:
continue

command_name = module.__name__.split('.')[-1]
command_name = module.__name__.split(".")[-1]

@click.group(name=command_name)
def workflow_runner():
def workflow_runner() -> None:
pass

for name, runner in runners.items():
Expand All @@ -34,7 +32,7 @@ def workflow_runner():
cdrun.add_command(workflow_runner)

@click.group(name=command_name)
def task_runner():
def task_runner() -> None:
pass

for name, runner in task_runners.items():
Expand Down
2 changes: 0 additions & 2 deletions src/climate_downscale/data.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from pathlib import Path


DEFAULT_ROOT = "/mnt/share/erf/ERA5/"


class ClimateDownscaleData:

def __init__(self, root: str | Path) -> None:
self._root = Path(root)
self._credentials_root = self._root / ".credentials"
Expand Down
49 changes: 24 additions & 25 deletions src/climate_downscale/extract/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,36 @@

### [ERA5](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview)

ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land
variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land
has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis.
Reanalysis combines model data with observations from across the world into a globally
complete and consistent dataset using the laws of physics. Reanalysis produces data
that goes several decades back in time, providing an accurate description of the
ERA5-Land is a reanalysis dataset providing a consistent view of the evolution of land
variables over several decades at an enhanced resolution compared to ERA5. ERA5-Land
has been produced by replaying the land component of the ECMWF ERA5 climate reanalysis.
Reanalysis combines model data with observations from across the world into a globally
complete and consistent dataset using the laws of physics. Reanalysis produces data
that goes several decades back in time, providing an accurate description of the
climate of the past.

ERA5-Land uses as input to control the simulated land fields ERA5 atmospheric
variables, such as air temperature and air humidity. This is called the atmospheric
forcing. Without the constraint of the atmospheric forcing, the model-based estimates
can rapidly deviate from reality. Therefore, while observations are not directly used
in the production of ERA5-Land, they have an indirect influence through the atmospheric
forcing used to run the simulation. In addition, the input air temperature, air
humidity and pressure used to run ERA5-Land are corrected to account for the altitude
difference between the grid of the forcing and the higher resolution grid of ERA5-Land.
ERA5-Land uses as input to control the simulated land fields ERA5 atmospheric
variables, such as air temperature and air humidity. This is called the atmospheric
forcing. Without the constraint of the atmospheric forcing, the model-based estimates
can rapidly deviate from reality. Therefore, while observations are not directly used
in the production of ERA5-Land, they have an indirect influence through the atmospheric
forcing used to run the simulation. In addition, the input air temperature, air
humidity and pressure used to run ERA5-Land are corrected to account for the altitude
difference between the grid of the forcing and the higher resolution grid of ERA5-Land.
This correction is called 'lapse rate correction'.

The ERA5-Land dataset, as any other simulation, provides estimates which have some
degree of uncertainty. Numerical models can only provide a more or less accurate
representation of the real physical processes governing different components of the
Earth System. In general, the uncertainty of model estimates grows as we go back in
time, because the number of observations available to create a good quality atmospheric
forcing is lower. ERA5-land parameter fields can currently be used in combination with
The ERA5-Land dataset, as any other simulation, provides estimates which have some
degree of uncertainty. Numerical models can only provide a more or less accurate
representation of the real physical processes governing different components of the
Earth System. In general, the uncertainty of model estimates grows as we go back in
time, because the number of observations available to create a good quality atmospheric
forcing is lower. ERA5-land parameter fields can currently be used in combination with
the uncertainty of the equivalent ERA5 fields.

The temporal and spatial resolutions of ERA5-Land makes this dataset very useful for
all kind of land surface applications such as flood or drought forecasting. The
temporal and spatial resolution of this dataset, the period covered in time, as well as
the fixed grid used for the data distribution at any period enables decisions makers,
The temporal and spatial resolutions of ERA5-Land makes this dataset very useful for
all kind of land surface applications such as flood or drought forecasting. The
temporal and spatial resolution of this dataset, the period covered in time, as well as
the fixed grid used for the data distribution at any period enables decisions makers,
businesses and individuals to access and use more accurate information on land states.

### [CMIP6](https://pcmdi.llnl.gov/CMIP6/)
Expand All @@ -53,4 +53,3 @@ GSOD dataset is produced by the National Centers for Environmental Information
## Downscaling Predictors

### [Local Climate Zones](https://lcz-generator.rub.de/global-lcz-map)

10 changes: 5 additions & 5 deletions src/climate_downscale/extract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from climate_downscale.extract.ncei_climate_stations import (
extract_ncei_climate_stations,
)
from climate_downscale.extract.era5 import (
extract_era5,
extract_era5_task,
)
from climate_downscale.extract.ncei_climate_stations import (
extract_ncei_climate_stations,
)

RUNNERS = {
'ncei': extract_ncei_climate_stations,
"ncei": extract_ncei_climate_stations,
"era5": extract_era5,
}

TASK_RUNNERS = {
'ncei': extract_ncei_climate_stations,
"ncei": extract_ncei_climate_stations,
"era5": extract_era5_task,
}
27 changes: 16 additions & 11 deletions src/climate_downscale/extract/era5.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from typing import TypeVar, ParamSpec
from pathlib import Path
from typing import ParamSpec, TypeVar

from climate_downscale.data import ClimateDownscaleData, DEFAULT_ROOT
from rra_tools.cli_tools import with_output_directory, with_queue, with_choice, ClickOption, RUN_ALL
import cdsapi
import click
from rra_tools import jobmon
from rra_tools.cli_tools import (
RUN_ALL,
ClickOption,
with_choice,
with_output_directory,
with_queue,
)

import click
import cdsapi
from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData

VALID_YEARS = [str(y) for y in range(1990, 2024)]
VALID_MONTHS = [f"{i:02d}" for i in range(1, 13)]
Expand Down Expand Up @@ -65,8 +71,8 @@ def extract_era5_main(
variable: str,
) -> None:
cddata = ClimateDownscaleData(output_dir)
cred_path = cddata.credentials_root / 'copernicus.txt'
url, key = cred_path.read_text().strip().split('\n')
cred_path = cddata.credentials_root / "copernicus.txt"
url, key = cred_path.read_text().strip().split("\n")

copernicus = cdsapi.Client(url=url, key=key)
kwargs = {
Expand All @@ -79,7 +85,7 @@ def extract_era5_main(
"time_zone": "UTC+00:00",
"frequency": "1-hourly",
"grid": "0.1/0.1",
"area": {"lat": [-90, 90], "lon": [-180, 180]}
"area": {"lat": [-90, 90], "lon": [-180, 180]},
}
result = copernicus.service(
"tool.toolbox.orchestrator.workflow",
Expand All @@ -96,7 +102,7 @@ def extract_era5_main(
copernicus.download(result, [out_path])


@click.command()
@click.command() # type: ignore[arg-type]
@with_output_directory(DEFAULT_ROOT)
@with_year()
@with_month()
Expand All @@ -105,7 +111,7 @@ def extract_era5_task(year: str, month: str, variable: str) -> None:
extract_era5_main(DEFAULT_ROOT, year, month, variable)


@click.command()
@click.command() # type: ignore[arg-type]
@with_output_directory(DEFAULT_ROOT)
@with_year(allow_all=True)
@with_variable(allow_all=True)
Expand Down Expand Up @@ -135,4 +141,3 @@ def extract_era5(
},
runner="cdtask",
)

33 changes: 16 additions & 17 deletions src/climate_downscale/extract/ncei_climate_stations.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,41 @@
from rra_tools.shell_tools import wget
from rra_tools.cli_tools import (
with_output_directory
)
import shutil
import tempfile
from pathlib import Path
import pandas as pd

import click
import pandas as pd
from rra_tools.cli_tools import with_output_directory
from rra_tools.shell_tools import wget

from climate_downscale.data import ClimateDownscaleData, DEFAULT_ROOT

from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData

EXTRACTION_YEARS = list(range(1990, 1992))
URL_TEMPLATE = 'https://www.ncei.noaa.gov/data/global-summary-of-the-day/archive/{year}.tar.gz'
URL_TEMPLATE = (
"https://www.ncei.noaa.gov/data/global-summary-of-the-day/archive/{year}.tar.gz"
)


def extract_ncei_climate_stations_main(output_dir: str | Path):
def extract_ncei_climate_stations_main(output_dir: str | Path) -> None:
cd_data = ClimateDownscaleData(output_dir)

data = []
dfs = []
for year in EXTRACTION_YEARS:
with tempfile.NamedTemporaryFile(suffix='.tar.gz') as f:
with tempfile.NamedTemporaryFile(suffix=".tar.gz") as f:
url = URL_TEMPLATE.format(year=year)

wget(url, f.name)

with tempfile.TemporaryDirectory() as outdir:
shutil.unpack_archive(f.name, outdir)
data.append(
pd.concat([pd.read_csv(f) for f in Path(outdir).glob('*.csv')])
dfs.append(
pd.concat([pd.read_csv(f) for f in Path(outdir).glob("*.csv")])
)
data = pd.concat(data)
data = pd.concat(dfs)

data.to_parquet(cd_data.ncei_climate_stations / 'climate_stations.parquet')
data.to_parquet(cd_data.ncei_climate_stations / "climate_stations.parquet")


@click.command()
@click.command() # type: ignore[arg-type]
@with_output_directory(DEFAULT_ROOT)
def extract_ncei_climate_stations(output_dir: str):
def extract_ncei_climate_stations(output_dir: str) -> None:
extract_ncei_climate_stations_main(output_dir)
1 change: 1 addition & 0 deletions tests/test_climate_downscale.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
def test_climate_downscale() -> None:
from climate_downscale import cli

assert cli

0 comments on commit 4f18280

Please sign in to comment.