Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to import the datasets #3

Merged
merged 20 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,89 @@
# ignore these files
# Distribution / packaging
.Python
build/
c
develop-eggs/
dist/
downloads/
eggs/
.eggs/
env/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
*.orig
*.tmp
MANIFEST

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Created by editors
*~
\#*
\.\#*
*.swp

# Created by PyCharm
.idea/

# eclipse/pydev
.project
.pydevproject
.settings

#Create by VSCode
.vscode

#pytest
.cache
.pytest_cache

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Jupyter Notebook
.ipynb_checkpoints

*.tmp
*.orig
/c
/tests/data/**
test-reports/
/test_bash.sh
/python_test_out.txt

# Build folder
doc/sphinx/build

# esgf-pyclient cache
*.sqlite
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include esmvaltool_sample_data/data/ *.nc
106 changes: 106 additions & 0 deletions esmvaltool_sample_data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from pathlib import Path

import cf_units
import iris
import yaml

base_dir = Path(__file__).parent

VERBOSE = False

with open(base_dir / 'datasets.yml', 'r') as f:
config = yaml.safe_load(f)

ignore_list = [fn.replace('.', '/') for fn in config['ignore']]


def strip_attributes(cube: 'iris.Cube') -> None:
"""Remove attributes in-place that cause issues with merging and
concatenation."""
for attr in ['creation_date', 'tracking_id', 'history']:
if attr in cube.attributes:
cube.attributes.pop(attr)


def simplify_time(cube: 'iris.Cube') -> None:
"""Simplifies the time coordinate in-place."""
coord = cube.coord('time')
coord.convert_units(
cf_units.Unit('days since 1850-1-1 00:00:00',
calendar=coord.units.calendar))


def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube':
"""Generator that loads all *.nc files from each input dir into a cube."""
for i, input_dir in enumerate(sorted(input_dirs)):
if VERBOSE:
print(f'Loading #{i:02d}:', input_dir)

files = input_dir.glob('*.nc')
cubes = iris.load(str(file) for file in files)
for cube in cubes:
strip_attributes(cube)
simplify_time(cube)

cube = cubes.concatenate_cube()

if VERBOSE:
print(' ', cube.shape, cube.coord('time').units.calendar)

yield cube


def filter_ignored_datasets(dirs, root):
for drc in dirs:
test_drc = str(drc.relative_to(root))
if test_drc not in ignore_list:
yield drc
elif VERBOSE:
print('Ignored:', test_drc)


def load_timeseries_cubes(mip_table: str = 'Amon') -> list:
"""Returns a list of iris cubes with timeseries data.

The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards.
All dimensions were reduced to a few steps except for the time dimension.

Parameters
----------
mip_table: str
select monthly (`Amon`) or daily (`day`) data.

Returns
-------
list of iris.cube
"""

timeseries_dir = base_dir / 'data' / 'timeseries'

paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc')
input_dirs = list(set(path.parent for path in paths))

input_dirs = list(filter_ignored_datasets(input_dirs, timeseries_dir))

cubes = load_cubes_from_input_dirs(input_dirs)

return list(cubes)


if __name__ == '__main__':
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved
VERBOSE = True

for mip_table in (
'Amon',
'day',
):
print()
print(f'Loading `{mip_table}`')
ts = load_timeseries_cubes(mip_table)

first_cube = ts[0]
for i, cube in enumerate(ts):
print(i)
cube.regrid(grid=first_cube, scheme=iris.analysis.Linear())

# breakpoint()
4 changes: 4 additions & 0 deletions esmvaltool_sample_data/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ ignore:
- CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.r1i1p1f1.Amon.ta.gr.v20200217
# something wrong with lon coord
- CMIP6.CMIP.UA.MCM-UA-1-0.historical.r1i1p1f1.Amon.ta.gn.v20190731
# iris.exceptions.ConcatenateError: failed to concatenate into a single cube.
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved
- CMIP6.CMIP.NCC.NorCPM1.historical.r1i1p1f1.Amon.ta.gn.v20190914
# Regridding -> ValueError: Cube 'air_temperature' must contain a single 1D y coordinate.
- CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204
54 changes: 54 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from setuptools import setup
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved

with open('README.md') as readme_file:
readme = readme_file.read()

PACKAGES = [
'esmvaltool_sample_data',
]

setup(
name='ESMValTool sample data',
version='0.0.1',
description="ESMValTool sample data",
long_description=readme + '\n\n',
author="",
author_email='',
url='https://github.com/ESMValGroup/ESMValTool_sample_data',
packages=PACKAGES,
include_package_data=True,
license="",
zip_safe=False,
keywords='ESMValTool',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
test_suite='tests',
install_requires=[
'scitools-iris>=2.2',
],
# tests_require=[
# 'pytest',
# 'pytest-cov',
# 'pycodestyle',
# ],
extras_require={
'develop': [
'codespell',
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved
'docformatter',
'esgf-pyclient',
'isort',
'myproxyclient',
'pre-commit',
'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4',
'yamllint',
'yapf',
],
},
)