Skip to content

Commit

Permalink
Merge pull request #26 from ScottWales/handling-args
Browse files Browse the repository at this point in the history
Add support for CMIP6
  • Loading branch information
ScottWales authored Aug 24, 2018
2 parents 16ec6ac + 3904b35 commit 1fa7371
Show file tree
Hide file tree
Showing 23 changed files with 15,070 additions and 213 deletions.
337 changes: 214 additions & 123 deletions arccssive2/cli.py

Large diffs are not rendered by default.

118 changes: 53 additions & 65 deletions arccssive2/esgf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,108 +15,96 @@
# limitations under the License.
from __future__ import print_function
import requests
import json
from sqlalchemy.sql import column, label
from sqlalchemy.orm import aliased
from sqlalchemy import String, Float, Integer, or_, func
from .pgvalues import values
from .model import Path, Checksum, metadata_dataset_link

def esgf_query(query, fields, limit=1000, offset=0, distrib=True, replica=False, latest=None,
cf_standard_name=None,
ensemble=None,
experiment=None,
experiment_family=None,
institute=None,
cmor_table=None,
model=None,
project=None,
product=None,
realm=None,
time_frequency=None,
variable=None,
variable_long_name=None,
source_id=None,
):
from .model import Path, Checksum, c5_metadata_dataset_link, c6_metadata_dataset_link

def define_facets(project):
''' Define available search facets based on project value: CMIP5 or CMIP6 '''
with open('../db/facets.json', 'r') as f:
data = f.read()
fdict = json.loads(data)
if project == 'CMIP5':
facets = {v: None for v in fdict.values() if v != 'None'}
facets['project'] = 'CMIP5'
elif project == 'CMIP6':
#facets = list(fdict.keys())
facets = {k: None for k in fdict.keys()}
facets['project'] = 'CMIP6'
return facets

def esgf_query(query, fields, limit=1000, offset=0, distrib=True, replica=False, latest=None, **kwargs):
"""
Search the ESGF
"""
#facets = define_facets(project)
if latest == 'all':
latest = None

if query is not None and len(query) == 0:
query = None

params = {
'query': query,
'fields': fields,
'offset': offset,
'limit': limit,
'distrib': distrib,
'replica': replica,
'latest': latest,
'type': 'File',
'format': 'application/solr+json',
}
params.update(kwargs)
r = requests.get('https://esgf-node.llnl.gov/esg-search/search',
params = {
'query': query,
'fields': fields,
'offset': offset,
'limit': limit,
'distrib': distrib,
'replica': replica,
'latest': latest,
'cf_standard_name':cf_standard_name,
'ensemble':ensemble,
'experiment':experiment,
'experiment_family':experiment_family,
'institute':institute,
'cmor_table':cmor_table,
'model':model,
'project':project,
'product':product,
'realm':realm,
'time_frequency':time_frequency,
'variable':variable,
'variable_long_name':variable_long_name,
'source_id':source_id,
'type': 'File',
'format': 'application/solr+json',
})
params = params )

r.raise_for_status()

return r.json()

def link_to_esgf(query, **kwargs):
r = requests.Request('GET','https://esgf-node.llnl.gov/search/esgf-llnl',
params = {

constraints = {k: v for k,v in kwargs.items() if v != ()}
params = {
'query': query,
'fields': kwargs.get('fields',None),
'offset': kwargs.get('offset',None),
'limit': kwargs.get('limit',None),
'distrib': 'on' if kwargs.get('distrib',True) else None,
'replica': 'on' if kwargs.get('replica',False) else None,
'latest': 'on' if kwargs.get('latest',None) else None,
'cf_standard_name': kwargs.get('cf_standard_name',None),
'ensemble': kwargs.get('ensemble',None),
'experiment': kwargs.get('experiment',None),
'experiment_family': kwargs.get('experiment_family',None),
'institute': kwargs.get('institute',None),
'cmor_table': kwargs.get('cmor_table',None),
'model': kwargs.get('model',None),
'project': kwargs.get('project',None),
'product': kwargs.get('product',None),
'realm': kwargs.get('realm',None),
'time_frequency': kwargs.get('time_frequency',None),
'variable': kwargs.get('variable',None),
'variable_long_name': kwargs.get('variable_long_name',None),
'source_id': kwargs.get('source_id',None),
})
'latest': 'on' if kwargs.get('latest',None) else None
}
params.update(constraints)

endpoint = 'cmip5'
if params.get('project','').lower() == 'cmip6':
endpoint = 'cmip6'

r = requests.Request('GET','https://esgf-node.llnl.gov/search/%s'%endpoint,
params=params,
)
p = r.prepare()
return r.prepare().url


def find_checksum_id(query, **kwargs):
def find_checksum_id(query, project, **kwargs):
"""
Returns a sqlalchemy selectable containing the ESGF id and checksum for
each query match
"""
response = esgf_query(query, 'checksum,id,dataset_id,title,version', **kwargs)
constraints = {k: v for k,v in kwargs.items() if v != ()}
constraints['project'] = project
response = esgf_query(query, 'checksum,id,dataset_id,title,version', **constraints)

if response['response']['numFound'] == 0:
raise Exception('No matches found on ESGF, check at %s'%link_to_esgf(query, **kwargs))
raise Exception('No matches found on ESGF, check at %s'%link_to_esgf(query, **constraints))

if response['response']['numFound'] > int(response['responseHeader']['params']['rows']):
raise Exception('Too many results (%d), try limiting your search %s'%(response['response']['numFound'], link_to_esgf(query, **kwargs)))
raise Exception('Too many results (%d), try limiting your search %s'%(response['response']['numFound'], link_to_esgf(query, **constraints)))

table = values([
column('checksum', String),
Expand Down
48 changes: 42 additions & 6 deletions arccssive2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,28 @@ def expr(self, model):
expr = super(pg_json_property, self).expr(model)
return expr.astext.cast(self.cast_type)

metadata_dataset_link = Table('esgf_metadata_dataset_link', Base.metadata,
c5_metadata_dataset_link = Table('c5_metadata_dataset_link', Base.metadata,
Column('file_id',
ForeignKey('esgf_paths.file_id'),
ForeignKey('metadata.md_hash'),
ForeignKey('checksums.ch_hash')),
Column('dataset_id', ForeignKey('esgf_dataset.dataset_id')))
Column('dataset_id', ForeignKey('cmip5_dataset.dataset_id')))

c6_metadata_dataset_link = Table('c6_metadata_dataset_link', Base.metadata,
Column('file_id',
ForeignKey('esgf_paths.file_id'),
ForeignKey('metadata.md_hash'),
ForeignKey('checksums.ch_hash')),
Column('dataset_id', ForeignKey('cmip6_dataset.dataset_id')))

class Path(Base):
__tablename__ = 'esgf_paths'

id = Column('file_id', UUID, primary_key=True)
path = Column('path', Text)

dataset = relationship('Dataset', secondary=metadata_dataset_link, viewonly=True)
c5dataset = relationship('C5Dataset', secondary=c5_metadata_dataset_link, viewonly=True)
c6dataset = relationship('C6Dataset', secondary=c6_metadata_dataset_link, viewonly=True)
netcdf = relationship('Netcdf', viewonly=True)
checksum = relationship('Checksum', viewonly=True)
extended = relationship('ExtendedMetadata', viewonly=True)
Expand Down Expand Up @@ -108,11 +116,11 @@ class ExtendedMetadata(Base):
variable = Column(Text)
period = Column(INT4RANGE)

class Dataset(Base):
class C5Dataset(Base):
"""
An ESGF dataset
A CMIP5 dataset
"""
__tablename__ = 'esgf_dataset'
__tablename__ = 'cmip5_dataset'

dataset_id = Column(Text, primary_key=True)
project = Column(Text)
Expand All @@ -126,3 +134,31 @@ class Dataset(Base):
p = Column(Integer)
ensemble = Column(Text)
cmor_table = Column(Text)

class C6Dataset(Base):
"""
A CMIP6 ESGF dataset
"""
__tablename__ = 'cmip6_dataset'

dataset_id = Column(Text, primary_key=True)
project = Column(Text)
activity_id = Column('activity_id', Text)
institution_id = Column('institution_id', Text)
source_id = Column('source_id', Text)
source_type = Column('source_type', Text)
experiment_id = Column('experiment_id', Text)
sub_experiment_id = Column('sub_experiment_id', Text)
frequency = Column('frequency', Text)
realm = Column(Text)
r = Column(Integer)
i = Column(Integer)
p = Column(Integer)
f = Column(Integer)
variant_label = Column('variant_label', Text)
member_id = Column('member_id', Text)
variable_id = Column( Text)
grid_label = Column('grid_label', Text)
nominal_resolution = Column('nominal_resolution', Text)
table_id = Column('table_id', Text)

38 changes: 38 additions & 0 deletions db/CMIP6_activity_id.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"activity_id":{
"AerChemMIP":"Aerosols and Chemistry Model Intercomparison Project",
"C4MIP":"Coupled Climate Carbon Cycle Model Intercomparison Project",
"CDRMIP":"Carbon Dioxide Removal Model Intercomparison Project",
"CFMIP":"Cloud Feedback Model Intercomparison Project",
"CMIP":"CMIP DECK: 1pctCO2, abrupt4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments",
"CORDEX":"Coordinated Regional Climate Downscaling Experiment",
"DAMIP":"Detection and Attribution Model Intercomparison Project",
"DCPP":"Decadal Climate Prediction Project",
"DynVarMIP":"Dynamics and Variability Model Intercomparison Project",
"FAFMIP":"Flux-Anomaly-Forced Model Intercomparison Project",
"GMMIP":"Global Monsoons Model Intercomparison Project",
"GeoMIP":"Geoengineering Model Intercomparison Project",
"HighResMIP":"High-Resolution Model Intercomparison Project",
"ISMIP6":"Ice Sheet Model Intercomparison Project for CMIP6",
"LS3MIP":"Land Surface, Snow and Soil Moisture",
"LUMIP":"Land-Use Model Intercomparison Project",
"OMIP":"Ocean Model Intercomparison Project",
"PAMIP":"Polar Amplification Model Intercomparison Project",
"PMIP":"Palaeoclimate Modelling Intercomparison Project",
"RFMIP":"Radiative Forcing Model Intercomparison Project",
"SIMIP":"Sea Ice Model Intercomparison Project",
"ScenarioMIP":"Scenario Model Intercomparison Project",
"VIACSAB":"Vulnerability, Impacts, Adaptation and Climate Services Advisory Board",
"VolMIP":"Volcanic Forcings Model Intercomparison Project"
},
"version_metadata":{
"CV_collection_modified":"Tue Aug 7 07:31:46 2018 -0700",
"CV_collection_version":"6.2.12.0",
"activity_id_CV_modified":"Mon Mar 5 16:39:09 2018 -0800",
"activity_id_CV_note":"Update activity_id to include CDRMIP and PAMIP",
"author":"Paul J. Durack <[email protected]>",
"institution_id":"PCMDI",
"previous_commit":"b27ce275adcf31632cbafb9b9f92af4235b3fff2",
"specs_doc":"v6.2.6 (20th December 2017; https://goo.gl/v1drZl)"
}
}
Loading

0 comments on commit 1fa7371

Please sign in to comment.