Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to import the datasets #3

Merged
merged 20 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,89 @@
# ignore these files
# Distribution / packaging
.Python
build/
c
develop-eggs/
dist/
downloads/
eggs/
.eggs/
env/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
*.orig
*.tmp
MANIFEST

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Created by editors
*~
\#*
\.\#*
*.swp

# Created by PyCharm
.idea/

# eclipse/pydev
.project
.pydevproject
.settings

#Create by VSCode
.vscode

#pytest
.cache
.pytest_cache

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Jupyter Notebook
.ipynb_checkpoints

*.tmp
*.orig
/c
/tests/data/**
test-reports/
/test_bash.sh
/python_test_out.txt

# Build folder
doc/sphinx/build

# esgf-pyclient cache
*.sqlite
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include esmvaltool_sample_data/data/ *.nc
1 change: 1 addition & 0 deletions esmvaltool_sample_data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .loader import load_map_cubes, load_profile_cubes, load_timeseries_cubes
81 changes: 81 additions & 0 deletions esmvaltool_sample_data/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from pathlib import Path

import cf_units
import iris

base_dir = Path(__file__).parent

problematic = [
# iris.exceptions.ConcatenateError: failed to concatenate into a single cube.
'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914',
# UserWarning: Gracefully filling 'lat' dimension coordinate masked points
'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120',
'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120',
]


def strip_attributes(cube):
"""Remove attributes that cause issues with merging and concatenation."""
for attr in ['creation_date', 'tracking_id', 'history']:
if attr in cube.attributes:
cube.attributes.pop(attr)


def simplify_time(cube):
coord = cube.coord('time')
coord.convert_units(
cf_units.Unit('days since 1850-1-1 00:00:00',
calendar=coord.units.calendar))


def load_cubes_from_input_dirs(input_dirs):
"""Loads all *.nc files from each input dir into a cube."""
for input_dir in input_dirs:
if str(input_dir) in problematic:
# print('Skipping', input_dir)
continue
# print(input_dir)
files = input_dir.glob('*.nc')
cubes = iris.load(str(file) for file in files)
for cube in cubes:
strip_attributes(cube)
simplify_time(cube)

cube = cubes.concatenate_cube()

yield cube


def load_timeseries_cubes(mip_table='Amon'):
"""
Data: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards.
All dimensions reduced to a few steps except for the time dimension
Some other variable / ocean, probably a different frequency,
similar number of timesteps, other dimensions reduced.
"""

timeseries_dir = base_dir / 'data' / 'timeseries'

paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc')
input_dirs = list(set(path.parent for path in paths))

cubes = load_cubes_from_input_dirs(input_dirs)

return list(cubes)


def load_map_cubes():
"""a 4D atmospheric variable, all dimensions reduced to a few steps except
the horizontal dimension(s) same for an ocean variable."""
raise NotImplementedError


def load_profile_cubes():
"""a 4D atmospheric variable, all dimensions reduced to a few steps except
the vertical dimension(s) same for an ocean variable."""
raise NotImplementedError


if __name__ == '__main__':
ts = load_timeseries_cubes()
breakpoint()
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved
54 changes: 54 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from setuptools import setup
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved

with open('README.md') as readme_file:
readme = readme_file.read()

PACKAGES = [
'esmvaltool_sample_data',
]

setup(
name='ESMValTool sample data',
version='0.0.1',
description="ESMValTool sample data",
long_description=readme + '\n\n',
author="",
author_email='',
url='https://github.com/ESMValGroup/ESMValTool_sample_data',
packages=PACKAGES,
include_package_data=True,
license="",
zip_safe=False,
keywords='ESMValTool',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
test_suite='tests',
install_requires=[
'scitools-iris>=2.2',
],
# tests_require=[
# 'pytest',
# 'pytest-cov',
# 'pycodestyle',
# ],
extras_require={
'develop': [
'codespell',
stefsmeets marked this conversation as resolved.
Show resolved Hide resolved
'docformatter',
'esgf-pyclient',
'isort',
'myproxyclient',
'pre-commit',
'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4',
'yamllint',
'yapf',
],
},
)